diff --git a/Documentation/devicetree/bindings/pps/pps-gpio.txt b/Documentation/devicetree/bindings/pps/pps-gpio.txt index 40bf9c3564a552639cbf8c432b2c3c175d630601..0de23b79365729d376a97032d76231ad70aa9b38 100644 --- a/Documentation/devicetree/bindings/pps/pps-gpio.txt +++ b/Documentation/devicetree/bindings/pps/pps-gpio.txt @@ -13,8 +13,12 @@ Optional properties: Example: pps { - compatible = "pps-gpio"; - gpios = <&gpio2 6 0>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_pps>; + gpios = <&gpio1 26 GPIO_ACTIVE_HIGH>; assert-falling-edge; + + compatible = "pps-gpio"; + status = "okay"; }; diff --git a/Documentation/pps/pps.txt b/Documentation/pps/pps.txt index 1fdbd544721639a6dc7a48576e0e3f1066873729..99f5d8c4c6525b6e2bd462f5f2b14150424d1c02 100644 --- a/Documentation/pps/pps.txt +++ b/Documentation/pps/pps.txt @@ -48,12 +48,12 @@ problem: time_pps_create(). This implies that the source has a /dev/... entry. This assumption is -ok for the serial and parallel port, where you can do something +OK for the serial and parallel port, where you can do something useful besides(!) the gathering of timestamps as it is the central -task for a PPS-API. But this assumption does not work for a single +task for a PPS API. But this assumption does not work for a single purpose GPIO line. In this case even basic file-related functionality (like read() and write()) makes no sense at all and should not be a -precondition for the use of a PPS-API. +precondition for the use of a PPS API. The problem can be simply solved if you consider that a PPS source is not always connected with a GPS data source. @@ -88,13 +88,13 @@ Coding example -------------- To register a PPS source into the kernel you should define a struct -pps_source_info_s as follows: +pps_source_info as follows: static struct pps_source_info pps_ktimer_info = { .name = "ktimer", .path = "", - .mode = PPS_CAPTUREASSERT | PPS_OFFSETASSERT | \ - PPS_ECHOASSERT | \ + .mode = PPS_CAPTUREASSERT | PPS_OFFSETASSERT | + PPS_ECHOASSERT | PPS_CANWAIT | PPS_TSFMT_TSPEC, .echo = pps_ktimer_echo, .owner = THIS_MODULE, @@ -108,13 +108,13 @@ initialization routine as follows: The pps_register_source() prototype is: - int pps_register_source(struct pps_source_info_s *info, int default_params) + int pps_register_source(struct pps_source_info *info, int default_params) where "info" is a pointer to a structure that describes a particular PPS source, "default_params" tells the system what the initial default parameters for the device should be (it is obvious that these parameters must be a subset of ones defined in the struct -pps_source_info_s which describe the capabilities of the driver). +pps_source_info which describe the capabilities of the driver). Once you have registered a new PPS source into the system you can signal an assert event (for example in the interrupt handler routine) @@ -142,8 +142,10 @@ If the SYSFS filesystem is enabled in the kernel it provides a new class: Every directory is the ID of a PPS sources defined in the system and inside you find several files: - $ ls /sys/class/pps/pps0/ - assert clear echo mode name path subsystem@ uevent + $ ls -F /sys/class/pps/pps0/ + assert dev mode path subsystem@ + clear echo name power/ uevent + Inside each "assert" and "clear" file you can find the timestamp and a sequence number: @@ -154,32 +156,32 @@ sequence number: Where before the "#" is the timestamp in seconds; after it is the sequence number. Other files are: -* echo: reports if the PPS source has an echo function or not; + * echo: reports if the PPS source has an echo function or not; -* mode: reports available PPS functioning modes; + * mode: reports available PPS functioning modes; -* name: reports the PPS source's name; + * name: reports the PPS source's name; -* path: reports the PPS source's device path, that is the device the - PPS source is connected to (if it exists). + * path: reports the PPS source's device path, that is the device the + PPS source is connected to (if it exists). Testing the PPS support ----------------------- In order to test the PPS support even without specific hardware you can use -the ktimer driver (see the client subsection in the PPS configuration menu) +the pps-ktimer driver (see the client subsection in the PPS configuration menu) and the userland tools available in your distribution's pps-tools package, -http://linuxpps.org , or https://github.com/ago/pps-tools . +http://linuxpps.org , or https://github.com/redlab-i/pps-tools. -Once you have enabled the compilation of ktimer just modprobe it (if +Once you have enabled the compilation of pps-ktimer just modprobe it (if not statically compiled): - # modprobe ktimer + # modprobe pps-ktimer and the run ppstest as follow: - $ ./ppstest /dev/pps0 + $ ./ppstest /dev/pps1 trying PPS source "/dev/pps1" found PPS source "/dev/pps1" ok, found 1 source(s), now start fetching data... @@ -187,7 +189,7 @@ and the run ppstest as follow: source 0 - assert 1186592700.388931295, sequence: 365 - clear 0.000000000, sequence: 0 source 0 - assert 1186592701.389032765, sequence: 366 - clear 0.000000000, sequence: 0 -Please, note that to compile userland programs you need the file timepps.h . +Please note that to compile userland programs, you need the file timepps.h. This is available in the pps-tools repository mentioned above. diff --git a/Documentation/rbtree.txt b/Documentation/rbtree.txt index b8a8c70b0188d8fd8c45b554f5ca121a0f3f0654..c42a21b9904616859e9c706caf64423d9e9d7ef6 100644 --- a/Documentation/rbtree.txt +++ b/Documentation/rbtree.txt @@ -193,6 +193,39 @@ Example:: for (node = rb_first(&mytree); node; node = rb_next(node)) printk("key=%s\n", rb_entry(node, struct mytype, node)->keystring); +Cached rbtrees +-------------- + +Computing the leftmost (smallest) node is quite a common task for binary +search trees, such as for traversals or users relying on a the particular +order for their own logic. To this end, users can use 'struct rb_root_cached' +to optimize O(logN) rb_first() calls to a simple pointer fetch avoiding +potentially expensive tree iterations. This is done at negligible runtime +overhead for maintanence; albeit larger memory footprint. + +Similar to the rb_root structure, cached rbtrees are initialized to be +empty via: + + struct rb_root_cached mytree = RB_ROOT_CACHED; + +Cached rbtree is simply a regular rb_root with an extra pointer to cache the +leftmost node. This allows rb_root_cached to exist wherever rb_root does, +which permits augmented trees to be supported as well as only a few extra +interfaces: + + struct rb_node *rb_first_cached(struct rb_root_cached *tree); + void rb_insert_color_cached(struct rb_node *, struct rb_root_cached *, bool); + void rb_erase_cached(struct rb_node *node, struct rb_root_cached *); + +Both insert and erase calls have their respective counterpart of augmented +trees: + + void rb_insert_augmented_cached(struct rb_node *node, struct rb_root_cached *, + bool, struct rb_augment_callbacks *); + void rb_erase_augmented_cached(struct rb_node *, struct rb_root_cached *, + struct rb_augment_callbacks *); + + Support for Augmented rbtrees ----------------------------- diff --git a/Documentation/vm/hmm.txt b/Documentation/vm/hmm.txt new file mode 100644 index 0000000000000000000000000000000000000000..4d3aac9f4a5dcbe86df5d81e41dcc70b82a819a4 --- /dev/null +++ b/Documentation/vm/hmm.txt @@ -0,0 +1,384 @@ +Heterogeneous Memory Management (HMM) + +Transparently allow any component of a program to use any memory region of said +program with a device without using device specific memory allocator. This is +becoming a requirement to simplify the use of advance heterogeneous computing +where GPU, DSP or FPGA are use to perform various computations. + +This document is divided as follow, in the first section i expose the problems +related to the use of a device specific allocator. The second section i expose +the hardware limitations that are inherent to many platforms. The third section +gives an overview of HMM designs. The fourth section explains how CPU page- +table mirroring works and what is HMM purpose in this context. Fifth section +deals with how device memory is represented inside the kernel. Finaly the last +section present the new migration helper that allow to leverage the device DMA +engine. + + +1) Problems of using device specific memory allocator: +2) System bus, device memory characteristics +3) Share address space and migration +4) Address space mirroring implementation and API +5) Represent and manage device memory from core kernel point of view +6) Migrate to and from device memory +7) Memory cgroup (memcg) and rss accounting + + +------------------------------------------------------------------------------- + +1) Problems of using device specific memory allocator: + +Device with large amount of on board memory (several giga bytes) like GPU have +historically manage their memory through dedicated driver specific API. This +creates a disconnect between memory allocated and managed by device driver and +regular application memory (private anonymous, share memory or regular file +back memory). From here on i will refer to this aspect as split address space. +I use share address space to refer to the opposite situation ie one in which +any memory region can be use by device transparently. + +Split address space because device can only access memory allocated through the +device specific API. This imply that all memory object in a program are not +equal from device point of view which complicate large program that rely on a +wide set of libraries. + +Concretly this means that code that wants to leverage device like GPU need to +copy object between genericly allocated memory (malloc, mmap private/share/) +and memory allocated through the device driver API (this still end up with an +mmap but of the device file). + +For flat dataset (array, grid, image, ...) this isn't too hard to achieve but +complex data-set (list, tree, ...) are hard to get right. Duplicating a complex +data-set need to re-map all the pointer relations between each of its elements. +This is error prone and program gets harder to debug because of the duplicate +data-set. + +Split address space also means that library can not transparently use data they +are getting from core program or other library and thus each library might have +to duplicate its input data-set using specific memory allocator. Large project +suffer from this and waste resources because of the various memory copy. + +Duplicating each library API to accept as input or output memory allocted by +each device specific allocator is not a viable option. It would lead to a +combinatorial explosions in the library entry points. + +Finaly with the advance of high level language constructs (in C++ but in other +language too) it is now possible for compiler to leverage GPU or other devices +without even the programmer knowledge. Some of compiler identified patterns are +only do-able with a share address. It is as well more reasonable to use a share +address space for all the other patterns. + + +------------------------------------------------------------------------------- + +2) System bus, device memory characteristics + +System bus cripple share address due to few limitations. Most system bus only +allow basic memory access from device to main memory, even cache coherency is +often optional. Access to device memory from CPU is even more limited, most +often than not it is not cache coherent. + +If we only consider the PCIE bus than device can access main memory (often +through an IOMMU) and be cache coherent with the CPUs. However it only allows +a limited set of atomic operation from device on main memory. This is worse +in the other direction the CPUs can only access a limited range of the device +memory and can not perform atomic operations on it. Thus device memory can not +be consider like regular memory from kernel point of view. + +Another crippling factor is the limited bandwidth (~32GBytes/s with PCIE 4.0 +and 16 lanes). This is 33 times less that fastest GPU memory (1 TBytes/s). +The final limitation is latency, access to main memory from the device has an +order of magnitude higher latency than when the device access its own memory. + +Some platform are developing new system bus or additions/modifications to PCIE +to address some of those limitations (OpenCAPI, CCIX). They mainly allow two +way cache coherency between CPU and device and allow all atomic operations the +architecture supports. Saddly not all platform are following this trends and +some major architecture are left without hardware solutions to those problems. + +So for share address space to make sense not only we must allow device to +access any memory memory but we must also permit any memory to be migrated to +device memory while device is using it (blocking CPU access while it happens). + + +------------------------------------------------------------------------------- + +3) Share address space and migration + +HMM intends to provide two main features. First one is to share the address +space by duplication the CPU page table into the device page table so same +address point to same memory and this for any valid main memory address in +the process address space. + +To achieve this, HMM offer a set of helpers to populate the device page table +while keeping track of CPU page table updates. Device page table updates are +not as easy as CPU page table updates. To update the device page table you must +allow a buffer (or use a pool of pre-allocated buffer) and write GPU specifics +commands in it to perform the update (unmap, cache invalidations and flush, +...). This can not be done through common code for all device. Hence why HMM +provides helpers to factor out everything that can be while leaving the gory +details to the device driver. + +The second mechanism HMM provide is a new kind of ZONE_DEVICE memory that does +allow to allocate a struct page for each page of the device memory. Those page +are special because the CPU can not map them. They however allow to migrate +main memory to device memory using exhisting migration mechanism and everything +looks like if page was swap out to disk from CPU point of view. Using a struct +page gives the easiest and cleanest integration with existing mm mechanisms. +Again here HMM only provide helpers, first to hotplug new ZONE_DEVICE memory +for the device memory and second to perform migration. Policy decision of what +and when to migrate things is left to the device driver. + +Note that any CPU access to a device page trigger a page fault and a migration +back to main memory ie when a page backing an given address A is migrated from +a main memory page to a device page then any CPU access to address A trigger a +page fault and initiate a migration back to main memory. + + +With this two features, HMM not only allow a device to mirror a process address +space and keeps both CPU and device page table synchronize, but also allow to +leverage device memory by migrating part of data-set that is actively use by a +device. + + +------------------------------------------------------------------------------- + +4) Address space mirroring implementation and API + +Address space mirroring main objective is to allow to duplicate range of CPU +page table into a device page table and HMM helps keeping both synchronize. A +device driver that want to mirror a process address space must start with the +registration of an hmm_mirror struct: + + int hmm_mirror_register(struct hmm_mirror *mirror, + struct mm_struct *mm); + int hmm_mirror_register_locked(struct hmm_mirror *mirror, + struct mm_struct *mm); + +The locked variant is to be use when the driver is already holding the mmap_sem +of the mm in write mode. The mirror struct has a set of callback that are use +to propagate CPU page table: + + struct hmm_mirror_ops { + /* sync_cpu_device_pagetables() - synchronize page tables + * + * @mirror: pointer to struct hmm_mirror + * @update_type: type of update that occurred to the CPU page table + * @start: virtual start address of the range to update + * @end: virtual end address of the range to update + * + * This callback ultimately originates from mmu_notifiers when the CPU + * page table is updated. The device driver must update its page table + * in response to this callback. The update argument tells what action + * to perform. + * + * The device driver must not return from this callback until the device + * page tables are completely updated (TLBs flushed, etc); this is a + * synchronous call. + */ + void (*update)(struct hmm_mirror *mirror, + enum hmm_update action, + unsigned long start, + unsigned long end); + }; + +Device driver must perform update to the range following action (turn range +read only, or fully unmap, ...). Once driver callback returns the device must +be done with the update. + + +When device driver wants to populate a range of virtual address it can use +either: + int hmm_vma_get_pfns(struct vm_area_struct *vma, + struct hmm_range *range, + unsigned long start, + unsigned long end, + hmm_pfn_t *pfns); + int hmm_vma_fault(struct vm_area_struct *vma, + struct hmm_range *range, + unsigned long start, + unsigned long end, + hmm_pfn_t *pfns, + bool write, + bool block); + +First one (hmm_vma_get_pfns()) will only fetch present CPU page table entry and +will not trigger a page fault on missing or non present entry. The second one +do trigger page fault on missing or read only entry if write parameter is true. +Page fault use the generic mm page fault code path just like a CPU page fault. + +Both function copy CPU page table into their pfns array argument. Each entry in +that array correspond to an address in the virtual range. HMM provide a set of +flags to help driver identify special CPU page table entries. + +Locking with the update() callback is the most important aspect the driver must +respect in order to keep things properly synchronize. The usage pattern is : + + int driver_populate_range(...) + { + struct hmm_range range; + ... + again: + ret = hmm_vma_get_pfns(vma, &range, start, end, pfns); + if (ret) + return ret; + take_lock(driver->update); + if (!hmm_vma_range_done(vma, &range)) { + release_lock(driver->update); + goto again; + } + + // Use pfns array content to update device page table + + release_lock(driver->update); + return 0; + } + +The driver->update lock is the same lock that driver takes inside its update() +callback. That lock must be call before hmm_vma_range_done() to avoid any race +with a concurrent CPU page table update. + +HMM implements all this on top of the mmu_notifier API because we wanted to a +simpler API and also to be able to perform optimization latter own like doing +concurrent device update in multi-devices scenario. + +HMM also serve as an impedence missmatch between how CPU page table update are +done (by CPU write to the page table and TLB flushes) from how device update +their own page table. Device update is a multi-step process, first appropriate +commands are write to a buffer, then this buffer is schedule for execution on +the device. It is only once the device has executed commands in the buffer that +the update is done. Creating and scheduling update command buffer can happen +concurrently for multiple devices. Waiting for each device to report commands +as executed is serialize (there is no point in doing this concurrently). + + +------------------------------------------------------------------------------- + +5) Represent and manage device memory from core kernel point of view + +Several differents design were try to support device memory. First one use +device specific data structure to keep information about migrated memory and +HMM hooked itself in various place of mm code to handle any access to address +that were back by device memory. It turns out that this ended up replicating +most of the fields of struct page and also needed many kernel code path to be +updated to understand this new kind of memory. + +Thing is most kernel code path never try to access the memory behind a page +but only care about struct page contents. Because of this HMM switchted to +directly using struct page for device memory which left most kernel code path +un-aware of the difference. We only need to make sure that no one ever try to +map those page from the CPU side. + +HMM provide a set of helpers to register and hotplug device memory as a new +region needing struct page. This is offer through a very simple API: + + struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops, + struct device *device, + unsigned long size); + void hmm_devmem_remove(struct hmm_devmem *devmem); + +The hmm_devmem_ops is where most of the important things are: + + struct hmm_devmem_ops { + void (*free)(struct hmm_devmem *devmem, struct page *page); + int (*fault)(struct hmm_devmem *devmem, + struct vm_area_struct *vma, + unsigned long addr, + struct page *page, + unsigned flags, + pmd_t *pmdp); + }; + +The first callback (free()) happens when the last reference on a device page is +drop. This means the device page is now free and no longer use by anyone. The +second callback happens whenever CPU try to access a device page which it can +not do. This second callback must trigger a migration back to system memory. + + +------------------------------------------------------------------------------- + +6) Migrate to and from device memory + +Because CPU can not access device memory, migration must use device DMA engine +to perform copy from and to device memory. For this we need a new migration +helper: + + int migrate_vma(const struct migrate_vma_ops *ops, + struct vm_area_struct *vma, + unsigned long mentries, + unsigned long start, + unsigned long end, + unsigned long *src, + unsigned long *dst, + void *private); + +Unlike other migration function it works on a range of virtual address, there +is two reasons for that. First device DMA copy has a high setup overhead cost +and thus batching multiple pages is needed as otherwise the migration overhead +make the whole excersie pointless. The second reason is because driver trigger +such migration base on range of address the device is actively accessing. + +The migrate_vma_ops struct define two callbacks. First one (alloc_and_copy()) +control destination memory allocation and copy operation. Second one is there +to allow device driver to perform cleanup operation after migration. + + struct migrate_vma_ops { + void (*alloc_and_copy)(struct vm_area_struct *vma, + const unsigned long *src, + unsigned long *dst, + unsigned long start, + unsigned long end, + void *private); + void (*finalize_and_map)(struct vm_area_struct *vma, + const unsigned long *src, + const unsigned long *dst, + unsigned long start, + unsigned long end, + void *private); + }; + +It is important to stress that this migration helpers allow for hole in the +virtual address range. Some pages in the range might not be migrated for all +the usual reasons (page is pin, page is lock, ...). This helper does not fail +but just skip over those pages. + +The alloc_and_copy() might as well decide to not migrate all pages in the +range (for reasons under the callback control). For those the callback just +have to leave the corresponding dst entry empty. + +Finaly the migration of the struct page might fails (for file back page) for +various reasons (failure to freeze reference, or update page cache, ...). If +that happens then the finalize_and_map() can catch any pages that was not +migrated. Note those page were still copied to new page and thus we wasted +bandwidth but this is considered as a rare event and a price that we are +willing to pay to keep all the code simpler. + + +------------------------------------------------------------------------------- + +7) Memory cgroup (memcg) and rss accounting + +For now device memory is accounted as any regular page in rss counters (either +anonymous if device page is use for anonymous, file if device page is use for +file back page or shmem if device page is use for share memory). This is a +deliberate choice to keep existing application that might start using device +memory without knowing about it to keep runing unimpacted. + +Drawbacks is that OOM killer might kill an application using a lot of device +memory and not a lot of regular system memory and thus not freeing much system +memory. We want to gather more real world experience on how application and +system react under memory pressure in the presence of device memory before +deciding to account device memory differently. + + +Same decision was made for memory cgroup. Device memory page are accounted +against same memory cgroup a regular page would be accounted to. This does +simplify migration to and from device memory. This also means that migration +back from device memory to regular memory can not fail because it would +go above memory cgroup limit. We might revisit this choice latter on once we +get more experience in how device memory is use and its impact on memory +resource control. + + +Note that device memory can never be pin nor by device driver nor through GUP +and thus such memory is always free upon process exit. Or when last reference +is drop in case of share memory or file back memory. diff --git a/MAINTAINERS b/MAINTAINERS index fe7a27ed0bdb03c3969cc55ae0a85fb18a43fdb0..7f32b510fdea9b69262a31b343942c3a7af82c77 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7457,6 +7457,13 @@ S: Maintained F: tools/testing/selftests/ F: Documentation/dev-tools/kselftest* +KERNEL USERMODE HELPER +M: "Luis R. Rodriguez" +L: linux-kernel@vger.kernel.org +S: Maintained +F: kernel/umh.c +F: include/linux/umh.h + KERNEL VIRTUAL MACHINE (KVM) M: Paolo Bonzini M: Radim Krčmář @@ -7632,7 +7639,7 @@ F: include/linux/kmemleak.h F: mm/kmemleak.c F: mm/kmemleak-test.c -KMOD MODULE USERMODE HELPER +KMOD KERNEL MODULE LOADER - USERMODE HELPER M: "Luis R. Rodriguez" L: linux-kernel@vger.kernel.org S: Maintained @@ -7790,6 +7797,13 @@ M: Sasha Levin S: Maintained F: tools/lib/lockdep/ +HMM - Heterogeneous Memory Management +M: Jérôme Glisse +L: linux-mm@kvack.org +S: Maintained +F: mm/hmm* +F: include/linux/hmm* + LIBNVDIMM BLK: MMIO-APERTURE DRIVER M: Ross Zwisler L: linux-nvdimm@lists.01.org @@ -10727,8 +10741,11 @@ W: http://wiki.enneenne.com/index.php/LinuxPPS_support L: linuxpps@ml.enneenne.com (subscribers-only) S: Maintained F: Documentation/pps/ +F: Documentation/devicetree/bindings/pps/pps-gpio.txt +F: Documentation/ABI/testing/sysfs-pps F: drivers/pps/ F: include/linux/pps*.h +F: include/uapi/linux/pps.h PPTP DRIVER M: Dmitry Kozlov diff --git a/arch/alpha/include/asm/string.h b/arch/alpha/include/asm/string.h index c2911f5917041abd49dea5f14855ac9691d1aec0..9eb9933d845fb8dc82f321e1a049641a4cd4c125 100644 --- a/arch/alpha/include/asm/string.h +++ b/arch/alpha/include/asm/string.h @@ -65,13 +65,14 @@ extern void * memchr(const void *, int, size_t); aligned values. The DEST and COUNT parameters must be even for correct operation. */ -#define __HAVE_ARCH_MEMSETW -extern void * __memsetw(void *dest, unsigned short, size_t count); - -#define memsetw(s, c, n) \ -(__builtin_constant_p(c) \ - ? __constant_c_memset((s),0x0001000100010001UL*(unsigned short)(c),(n)) \ - : __memsetw((s),(c),(n))) +#define __HAVE_ARCH_MEMSET16 +extern void * __memset16(void *dest, unsigned short, size_t count); +static inline void *memset16(uint16_t *p, uint16_t v, size_t n) +{ + if (__builtin_constant_p(v)) + return __constant_c_memset(p, 0x0001000100010001UL * v, n * 2); + return __memset16(p, v, n * 2); +} #endif /* __KERNEL__ */ diff --git a/arch/alpha/include/asm/vga.h b/arch/alpha/include/asm/vga.h index c00106bac52146745a5b128e7e05b1e1480e8400..3c1c2b6128e77f41a714649bb4dbee92205f0c38 100644 --- a/arch/alpha/include/asm/vga.h +++ b/arch/alpha/include/asm/vga.h @@ -34,7 +34,7 @@ static inline void scr_memsetw(u16 *s, u16 c, unsigned int count) if (__is_ioaddr(s)) memsetw_io((u16 __iomem *) s, c, count); else - memsetw(s, c, count); + memset16(s, c, count / 2); } /* Do not trust that the usage will be correct; analyze the arguments. */ diff --git a/arch/alpha/lib/memset.S b/arch/alpha/lib/memset.S index 89a26f5e89de3db904cbfabb1ff735464d6ca3cb..f824969e9e77a61eb2750cf82c41c8da3d0e2557 100644 --- a/arch/alpha/lib/memset.S +++ b/arch/alpha/lib/memset.S @@ -20,7 +20,7 @@ .globl memset .globl __memset .globl ___memset - .globl __memsetw + .globl __memset16 .globl __constant_c_memset .ent ___memset @@ -110,8 +110,8 @@ EXPORT_SYMBOL(___memset) EXPORT_SYMBOL(__constant_c_memset) .align 5 - .ent __memsetw -__memsetw: + .ent __memset16 +__memset16: .prologue 0 inswl $17,0,$1 /* E0 */ @@ -123,8 +123,8 @@ __memsetw: or $1,$4,$17 /* E0 */ br __constant_c_memset /* .. E1 */ - .end __memsetw -EXPORT_SYMBOL(__memsetw) + .end __memset16 +EXPORT_SYMBOL(__memset16) memset = ___memset __memset = ___memset diff --git a/arch/arm/include/asm/string.h b/arch/arm/include/asm/string.h index cf4f3aad0fc1c2154c6cf3839ff21bb1c46d6499..fe1c6af3a1b1a4cab35c6d125b43262d3d21fd85 100644 --- a/arch/arm/include/asm/string.h +++ b/arch/arm/include/asm/string.h @@ -24,6 +24,20 @@ extern void * memchr(const void *, int, __kernel_size_t); #define __HAVE_ARCH_MEMSET extern void * memset(void *, int, __kernel_size_t); +#define __HAVE_ARCH_MEMSET32 +extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t); +static inline void *memset32(uint32_t *p, uint32_t v, __kernel_size_t n) +{ + return __memset32(p, v, n * 4); +} + +#define __HAVE_ARCH_MEMSET64 +extern void *__memset64(uint64_t *, uint32_t low, __kernel_size_t, uint32_t hi); +static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n) +{ + return __memset64(p, v, n * 8, v >> 32); +} + extern void __memzero(void *ptr, __kernel_size_t n); #define memset(p,v,n) \ diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index 8e8d20cdbce736b251c93f3822f741893dd9824c..5266fd9ad6b41402dceafcdf1991776f39fbe57b 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -87,6 +87,8 @@ EXPORT_SYMBOL(__raw_writesl); EXPORT_SYMBOL(strchr); EXPORT_SYMBOL(strrchr); EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(__memset32); +EXPORT_SYMBOL(__memset64); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(memchr); diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S index 3c65e3bd790fe1f7aec59d9568cb4c0799be0e9a..ed6d35d9cdb5a6288f70d116e6914f803455006e 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S @@ -28,7 +28,7 @@ UNWIND( .fnstart ) 1: orr r1, r1, r1, lsl #8 orr r1, r1, r1, lsl #16 mov r3, r1 - cmp r2, #16 +7: cmp r2, #16 blt 4f #if ! CALGN(1)+0 @@ -41,7 +41,7 @@ UNWIND( .fnend ) UNWIND( .fnstart ) UNWIND( .save {r8, lr} ) mov r8, r1 - mov lr, r1 + mov lr, r3 2: subs r2, r2, #64 stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time. @@ -73,11 +73,11 @@ UNWIND( .fnend ) UNWIND( .fnstart ) UNWIND( .save {r4-r8, lr} ) mov r4, r1 - mov r5, r1 + mov r5, r3 mov r6, r1 - mov r7, r1 + mov r7, r3 mov r8, r1 - mov lr, r1 + mov lr, r3 cmp r2, #96 tstgt ip, #31 @@ -114,7 +114,7 @@ UNWIND( .fnstart ) tst r2, #4 strne r1, [ip], #4 /* - * When we get here, we've got less than 4 bytes to zero. We + * When we get here, we've got less than 4 bytes to set. We * may have an unaligned pointer as well. */ 5: tst r2, #2 @@ -135,3 +135,15 @@ UNWIND( .fnstart ) UNWIND( .fnend ) ENDPROC(memset) ENDPROC(mmioset) + +ENTRY(__memset32) +UNWIND( .fnstart ) + mov r3, r1 @ copy r1 to r3 and fall into memset64 +UNWIND( .fnend ) +ENDPROC(__memset32) +ENTRY(__memset64) +UNWIND( .fnstart ) + mov ip, r0 @ preserve r0 as return value + b 7b @ jump into the middle of memset +UNWIND( .fnend ) +ENDPROC(__memset64) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index ffe089942ac4d6e995e67c32fecbfcba27576949..9f7195a5773ee66138bc5170508f8c251f501e47 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -690,7 +690,7 @@ void __init smp_init_cpus(void) acpi_parse_gic_cpu_interface, 0); if (cpu_count > nr_cpu_ids) - pr_warn("Number of cores (%d) exceeds configured maximum of %d - clipping\n", + pr_warn("Number of cores (%d) exceeds configured maximum of %u - clipping\n", cpu_count, nr_cpu_ids); if (!bootcpu_valid) { diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index eefd9a4ed156753b6f07cb41cd0cb97ca0f99a48..1cce8243449e03da1855e39c180e9a459bb3ccc7 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -17,6 +17,9 @@ config FRV select HAVE_DEBUG_STACKOVERFLOW select ARCH_NO_COHERENT_DMA_MMAP +config CPU_BIG_ENDIAN + def_bool y + config ZONE_DMA bool default y diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index 6e3d36f37a02f146424c4ba0b87ae8ab153a0a1c..3089f7fe2abdb2aafa7bb613c45319720ea77a4f 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -23,6 +23,9 @@ config H8300 select HAVE_ARCH_HASH select CPU_NO_EFFICIENT_FFS +config CPU_BIG_ENDIAN + def_bool y + config RWSEM_GENERIC_SPINLOCK def_bool y diff --git a/arch/m32r/configs/m32104ut_defconfig b/arch/m32r/configs/m32104ut_defconfig index be30e094db71ffed9db89ff126d23729d2c6e377..4aa42acbd5129a5b0f85d9062d0fd05b39d173a0 100644 --- a/arch/m32r/configs/m32104ut_defconfig +++ b/arch/m32r/configs/m32104ut_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y @@ -40,7 +39,6 @@ CONFIG_NETFILTER_XT_MATCH_REALM=m CONFIG_NETFILTER_XT_MATCH_SCTP=m CONFIG_NETFILTER_XT_MATCH_STRING=m CONFIG_NETFILTER_XT_MATCH_TCPMSS=m -CONFIG_IP_NF_QUEUE=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_ADDRTYPE=m CONFIG_IP_NF_MATCH_ECN=m @@ -48,7 +46,6 @@ CONFIG_IP_NF_MATCH_TTL=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m @@ -106,7 +103,6 @@ CONFIG_SENSORS_SMSC47M1=m CONFIG_SENSORS_W83781D=m CONFIG_SENSORS_W83L785TS=m CONFIG_SENSORS_W83627HF=m -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y diff --git a/arch/m32r/configs/m32700ut.smp_defconfig b/arch/m32r/configs/m32700ut.smp_defconfig index a3d727ed6a1609689c1129973199d64755546ac9..41a0495b65df4d44a2b90a1a69974d38796aa454 100644 --- a/arch/m32r/configs/m32700ut.smp_defconfig +++ b/arch/m32r/configs/m32700ut.smp_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y @@ -30,7 +29,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y # CONFIG_IPV6 is not set CONFIG_MTD=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_REDBOOT_PARTS=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=m @@ -63,7 +61,6 @@ CONFIG_SERIAL_M32R_SIO_CONSOLE=y CONFIG_SERIAL_M32R_PLDSIO=y CONFIG_HW_RANDOM=y CONFIG_DS1302=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_FB=y CONFIG_FIRMWARE_EDID=y CONFIG_FB_S1D13XXX=y diff --git a/arch/m32r/configs/m32700ut.up_defconfig b/arch/m32r/configs/m32700ut.up_defconfig index b8334163099defd3aa74fa17ed7ee042da5ae74f..20078a866f45eca7775a563e38d15ae9b5b5e46d 100644 --- a/arch/m32r/configs/m32700ut.up_defconfig +++ b/arch/m32r/configs/m32700ut.up_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y @@ -29,7 +28,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y # CONFIG_IPV6 is not set CONFIG_MTD=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_REDBOOT_PARTS=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=m @@ -62,7 +60,6 @@ CONFIG_SERIAL_M32R_SIO_CONSOLE=y CONFIG_SERIAL_M32R_PLDSIO=y CONFIG_HW_RANDOM=y CONFIG_DS1302=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_FB=y CONFIG_FIRMWARE_EDID=y CONFIG_FB_S1D13XXX=y diff --git a/arch/m32r/configs/mappi.nommu_defconfig b/arch/m32r/configs/mappi.nommu_defconfig index 7c90ce2fc42b77713d7f34e0c9f3b481d8d0a468..4bf3820e054a899565424270e8e974f7131a14b2 100644 --- a/arch/m32r/configs/mappi.nommu_defconfig +++ b/arch/m32r/configs/mappi.nommu_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y CONFIG_LOG_BUF_SHIFT=14 @@ -39,7 +38,6 @@ CONFIG_NETDEVICES=y # CONFIG_VT is not set CONFIG_SERIAL_M32R_SIO_CONSOLE=y CONFIG_HW_RANDOM=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y CONFIG_NFS_FS=y diff --git a/arch/m32r/configs/mappi.smp_defconfig b/arch/m32r/configs/mappi.smp_defconfig index 367d07cebcd373786d9f3374d56e852235e8a2a8..f9ed7bdbf4ded00481fb0b6cfdd10ab717e3bb44 100644 --- a/arch/m32r/configs/mappi.smp_defconfig +++ b/arch/m32r/configs/mappi.smp_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y @@ -31,9 +30,7 @@ CONFIG_IP_PNP_DHCP=y # CONFIG_IPV6 is not set # CONFIG_STANDALONE is not set CONFIG_MTD=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_REDBOOT_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=m @@ -50,7 +47,6 @@ CONFIG_NETDEVICES=y # CONFIG_VT is not set CONFIG_SERIAL_M32R_SIO_CONSOLE=y CONFIG_HW_RANDOM=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y CONFIG_ISO9660_FS=y diff --git a/arch/m32r/configs/mappi.up_defconfig b/arch/m32r/configs/mappi.up_defconfig index cb11384386ce194d72dcdfdae3f4c28d1d26cd6b..289ae7421e1214e3bc41cfd051d228046c15ab8d 100644 --- a/arch/m32r/configs/mappi.up_defconfig +++ b/arch/m32r/configs/mappi.up_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y @@ -29,9 +28,7 @@ CONFIG_IP_PNP_DHCP=y # CONFIG_IPV6 is not set # CONFIG_STANDALONE is not set CONFIG_MTD=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_REDBOOT_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=m @@ -48,7 +45,6 @@ CONFIG_NETDEVICES=y # CONFIG_VT is not set CONFIG_SERIAL_M32R_SIO_CONSOLE=y CONFIG_HW_RANDOM=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y CONFIG_ISO9660_FS=y diff --git a/arch/m32r/configs/mappi2.opsp_defconfig b/arch/m32r/configs/mappi2.opsp_defconfig index 3bff779259b485179e52717d37dda19eb205fa2f..2852f6e7e246a66d13c1ced60a345c5aa1e67467 100644 --- a/arch/m32r/configs/mappi2.opsp_defconfig +++ b/arch/m32r/configs/mappi2.opsp_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y @@ -50,7 +49,6 @@ CONFIG_SMC91X=y # CONFIG_SERIO_I8042 is not set CONFIG_SERIAL_M32R_SIO_CONSOLE=y CONFIG_HW_RANDOM=y -CONFIG_VIDEO_OUTPUT_CONTROL=m # CONFIG_VGA_CONSOLE is not set CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y diff --git a/arch/m32r/configs/mappi2.vdec2_defconfig b/arch/m32r/configs/mappi2.vdec2_defconfig index 75246c9c1af82e9082baef90eabddc87e8e54b98..8da4dbad8510c55c9e8addc7656d6366bd1c8959 100644 --- a/arch/m32r/configs/mappi2.vdec2_defconfig +++ b/arch/m32r/configs/mappi2.vdec2_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y @@ -49,7 +48,6 @@ CONFIG_SMC91X=y # CONFIG_SERIO_I8042 is not set CONFIG_SERIAL_M32R_SIO_CONSOLE=y CONFIG_HW_RANDOM=y -CONFIG_VIDEO_OUTPUT_CONTROL=m # CONFIG_VGA_CONSOLE is not set CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y diff --git a/arch/m32r/configs/mappi3.smp_defconfig b/arch/m32r/configs/mappi3.smp_defconfig index 27cefd41ac1fd5ab1c7dd8e8829bf1935fc3bd8e..5605b23e2faf8ffc2751e8436bfe2d15fb334e5f 100644 --- a/arch/m32r/configs/mappi3.smp_defconfig +++ b/arch/m32r/configs/mappi3.smp_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y @@ -29,9 +28,7 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y # CONFIG_IPV6 is not set CONFIG_MTD=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_REDBOOT_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=m @@ -50,7 +47,6 @@ CONFIG_SMC91X=y # CONFIG_VT is not set CONFIG_SERIAL_M32R_SIO_CONSOLE=y CONFIG_HW_RANDOM=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y CONFIG_ISO9660_FS=y diff --git a/arch/m32r/configs/oaks32r_defconfig b/arch/m32r/configs/oaks32r_defconfig index 5087a510ca4fcfaefe8df4aa3eb46ed328312275..5ccab127f6ad828ebeaa1a2bae04ae6140f00083 100644 --- a/arch/m32r/configs/oaks32r_defconfig +++ b/arch/m32r/configs/oaks32r_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set @@ -37,7 +36,6 @@ CONFIG_NETDEVICES=y # CONFIG_VT is not set CONFIG_SERIAL_M32R_SIO_CONSOLE=y CONFIG_HW_RANDOM=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_EXT2_FS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y diff --git a/arch/m32r/configs/opsput_defconfig b/arch/m32r/configs/opsput_defconfig index 50c6f525db209ff01da71aae187ae3b9f6683aee..3ce1d08355e536dc979b6897e2b17f61fe65aa92 100644 --- a/arch/m32r/configs/opsput_defconfig +++ b/arch/m32r/configs/opsput_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y @@ -46,7 +45,6 @@ CONFIG_SERIAL_M32R_SIO_CONSOLE=y CONFIG_SERIAL_M32R_PLDSIO=y CONFIG_HW_RANDOM=y CONFIG_DS1302=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y CONFIG_ISO9660_FS=m diff --git a/arch/m32r/configs/usrv_defconfig b/arch/m32r/configs/usrv_defconfig index a3cfaaedab609d70e7c3facf7c97d4f4787d7a6c..cb8c051c3d466c788dad63b988b1d1302b6e2386 100644 --- a/arch/m32r/configs/usrv_defconfig +++ b/arch/m32r/configs/usrv_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_BSD_PROCESS_ACCT=y @@ -34,9 +33,6 @@ CONFIG_INET_ESP=y CONFIG_INET_IPCOMP=y # CONFIG_IPV6 is not set CONFIG_MTD=y -CONFIG_MTD_CONCAT=y -CONFIG_MTD_PARTITIONS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_ADV_OPTIONS=y @@ -62,7 +58,6 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y # CONFIG_SERIAL_M32R_SIO is not set # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_FS_XATTR is not set diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 5abb548f0e7075320e339671bbb4cbae65a8d2cd..353d90487c2b9e74bb56e3b35205ae74901b6325 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -24,6 +24,9 @@ config M68K select OLD_SIGSUSPEND3 select OLD_SIGACTION +config CPU_BIG_ENDIAN + def_bool y + config RWSEM_GENERIC_SPINLOCK bool default y diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 4ed8ebf33509e5c5d843e1badf3c548c4bac6420..9d26abdf0dc1c2349e5822b01416f0f0cd154b58 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -36,6 +36,22 @@ config MICROBLAZE select VIRT_TO_BUS select CPU_NO_EFFICIENT_FFS +# Endianness selection +choice + prompt "Endianness selection" + default CPU_BIG_ENDIAN + help + microblaze architectures can be configured for either little or + big endian formats. Be sure to select the appropriate mode. + +config CPU_BIG_ENDIAN + bool "Big endian" + +config CPU_LITTLE_ENDIAN + bool "Little endian" + +endchoice + config SWAP def_bool n diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile index 740f2b82a182a9761b79028d38bde746ba7131c3..1f6c486826a0ee19880b8e7c6eb97f2a267f1bb5 100644 --- a/arch/microblaze/Makefile +++ b/arch/microblaze/Makefile @@ -35,6 +35,8 @@ endif CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_DIV) += -mno-xl-soft-div CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_BARREL) += -mxl-barrel-shift CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR) += -mxl-pattern-compare +CPUFLAGS-$(CONFIG_BIG_ENDIAN) += -mbig-endian +CPUFLAGS-$(CONFIG_LITTLE_ENDIAN) += -mlittle-endian CPUFLAGS-1 += $(call cc-option,-mcpu=v$(CPU_VER)) diff --git a/arch/mips/include/asm/vga.h b/arch/mips/include/asm/vga.h index f82c83749a089bf26d5a06846ebd73edcc4130a8..975ff51f80c4d55868f9296c19ce2ef64ceab622 100644 --- a/arch/mips/include/asm/vga.h +++ b/arch/mips/include/asm/vga.h @@ -6,6 +6,7 @@ #ifndef _ASM_VGA_H #define _ASM_VGA_H +#include #include #include @@ -40,9 +41,15 @@ static inline u16 scr_readw(volatile const u16 *addr) return le16_to_cpu(*addr); } +static inline void scr_memsetw(u16 *s, u16 v, unsigned int count) +{ + memset16(s, cpu_to_le16(v), count / 2); +} + #define scr_memcpyw(d, s, c) memcpy(d, s, c) #define scr_memmovew(d, s, c) memmove(d, s, c) #define VT_BUF_HAVE_MEMCPYW #define VT_BUF_HAVE_MEMMOVEW +#define VT_BUF_HAVE_MEMSETW #endif /* _ASM_VGA_H */ diff --git a/arch/mn10300/configs/asb2303_defconfig b/arch/mn10300/configs/asb2303_defconfig index 1fd41ec1dfb54396846832e9f42b904f65b97d68..d06dae131139c3785fcd113b78a4ba3c45e83b82 100644 --- a/arch/mn10300/configs/asb2303_defconfig +++ b/arch/mn10300/configs/asb2303_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_TINY_RCU=y @@ -28,16 +27,13 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set CONFIG_MTD=y CONFIG_MTD_DEBUG=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_REDBOOT_PARTS=y CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED=y -CONFIG_MTD_CHAR=y CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y CONFIG_MTD_CFI_ADV_OPTIONS=y @@ -48,8 +44,6 @@ CONFIG_MTD_PHYSMAP=y CONFIG_NETDEVICES=y CONFIG_NET_ETHERNET=y CONFIG_SMC91X=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_WLAN is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set diff --git a/arch/mn10300/configs/asb2364_defconfig b/arch/mn10300/configs/asb2364_defconfig index cd0a6cb17deebd988a2871607d19c423a430b233..b1d80cee97eef96d8a793703686c245f55eb8ec6 100644 --- a/arch/mn10300/configs/asb2364_defconfig +++ b/arch/mn10300/configs/asb2364_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_BSD_PROCESS_ACCT=y @@ -40,7 +39,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_INET_DIAG is not set CONFIG_IPV6=y # CONFIG_INET6_XFRM_MODE_TRANSPORT is not set @@ -50,10 +48,8 @@ CONFIG_IPV6=y CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_DEBUG=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_REDBOOT_PARTS=y CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED=y -CONFIG_MTD_CHAR=y CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y CONFIG_MTD_CFI_ADV_OPTIONS=y @@ -64,8 +60,6 @@ CONFIG_MTD_PHYSMAP=y CONFIG_NETDEVICES=y CONFIG_NET_ETHERNET=y CONFIG_SMSC911X=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -77,7 +71,6 @@ CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_SHARE_IRQ=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set -# CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set CONFIG_PROC_KCORE=y # CONFIG_PROC_PAGE_MONITOR is not set @@ -93,4 +86,3 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index 1e95920b07377417415c8622201d8e04bfd1f1a7..a0f2e4a323c1d543b46f8de29c90ab3d90519b72 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -29,6 +29,9 @@ config OPENRISC select CPU_NO_EFFICIENT_FFS if !OPENRISC_HAVE_INST_FF1 select NO_BOOTMEM +config CPU_BIG_ENDIAN + def_bool y + config MMU def_bool y diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 13648519bd415e938368d9b6b8ae55e65fce2ff5..ba7b7ddc38442dab7e9b0679bba03478bf20096c 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -59,6 +59,9 @@ config PARISC config CPU_BIG_ENDIAN def_bool y +config CPU_BIG_ENDIAN + def_bool y + config MMU def_bool y diff --git a/arch/powerpc/include/asm/vga.h b/arch/powerpc/include/asm/vga.h index ab3acd2f2786c61f721e031cc79f51ffa457605a..7a7b541b74930440372fcea8c87afeb3698eb0c2 100644 --- a/arch/powerpc/include/asm/vga.h +++ b/arch/powerpc/include/asm/vga.h @@ -33,8 +33,16 @@ static inline u16 scr_readw(volatile const u16 *addr) return le16_to_cpu(*addr); } +#define VT_BUF_HAVE_MEMSETW +static inline void scr_memsetw(u16 *s, u16 v, unsigned int n) +{ + memset16(s, cpu_to_le16(v), n / 2); +} + #define VT_BUF_HAVE_MEMCPYW +#define VT_BUF_HAVE_MEMMOVEW #define scr_memcpyw memcpy +#define scr_memmovew memmove #endif /* !CONFIG_VGA_CONSOLE && !CONFIG_MDA_CONSOLE */ diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 70f073d6c3b27c5cac8f8c8492da879f51f80622..2ff2b8a19f712d8b14c958b86ca947bdc50169cb 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -224,7 +224,7 @@ void __init allocate_pacas(void) paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit)); memset(paca, 0, paca_size); - printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n", + printk(KERN_DEBUG "Allocated %u bytes for %u pacas at %p\n", paca_size, nr_cpu_ids, paca); allocate_lppacas(nr_cpu_ids, limit); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 7de73589d8e24274391de8b60b15bdaa4d703de5..0ac741fae90ea5c1cffc227323fe40cbb5319188 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -551,7 +551,7 @@ void __init smp_setup_cpu_maps(void) if (maxcpus > nr_cpu_ids) { printk(KERN_WARNING "Partition configured for %d cpus, " - "operating system maximum is %d.\n", + "operating system maximum is %u.\n", maxcpus, nr_cpu_ids); maxcpus = nr_cpu_ids; } else diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 44f3a25ca630a61df697639155fce0214c09a8d3..ebc244b08d6748512c19199446d25f7ac49fca9b 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -511,13 +511,13 @@ static bool xive_parse_provisioning(struct device_node *np) static void xive_native_setup_pools(void) { /* Allocate a pool big enough */ - pr_debug("XIVE: Allocating VP block for pool size %d\n", nr_cpu_ids); + pr_debug("XIVE: Allocating VP block for pool size %u\n", nr_cpu_ids); xive_pool_vps = xive_native_alloc_vp_block(nr_cpu_ids); if (WARN_ON(xive_pool_vps == XIVE_INVALID_VP)) pr_err("XIVE: Failed to allocate pool VP, KVM might not function\n"); - pr_debug("XIVE: Pool VPs allocated at 0x%x for %d max CPUs\n", + pr_debug("XIVE: Pool VPs allocated at 0x%x for %u max CPUs\n", xive_pool_vps, nr_cpu_ids); } diff --git a/arch/sh/configs/ap325rxa_defconfig b/arch/sh/configs/ap325rxa_defconfig index e5335123b5e99c098f15d2a284e1d28c605277c3..72b72e50a92edf3feb2bb09c84d8b572a9344e00 100644 --- a/arch/sh/configs/ap325rxa_defconfig +++ b/arch/sh/configs/ap325rxa_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y @@ -28,14 +27,10 @@ CONFIG_IP_PNP_DHCP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y -CONFIG_MTD_CONCAT=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -51,8 +46,6 @@ CONFIG_NETDEVICES=y CONFIG_SMSC_PHY=y CONFIG_NET_ETHERNET=y CONFIG_SMSC911X=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -82,7 +75,6 @@ CONFIG_FB=y CONFIG_FB_SH_MOBILE_LCDC=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y -# CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set CONFIG_MMC=y CONFIG_MMC_SPI=y @@ -110,8 +102,6 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_932=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_MUST_CHECK is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_CRYPTO=y CONFIG_CRYPTO_CBC=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/sh/configs/apsh4a3a_defconfig b/arch/sh/configs/apsh4a3a_defconfig index 6cb327977d1379a806ff7590bcee1ae6bd51a71b..4710df43a5b54f78862f454ab267bde3eb56ed13 100644 --- a/arch/sh/configs/apsh4a3a_defconfig +++ b/arch/sh/configs/apsh4a3a_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y @@ -28,15 +27,11 @@ CONFIG_INET=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CONCAT=y -CONFIG_MTD_PARTITIONS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -46,8 +41,6 @@ CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_NETDEVICES=y CONFIG_NET_ETHERNET=y CONFIG_SMSC911X=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_WLAN is not set # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set @@ -66,7 +59,6 @@ CONFIG_FONTS=y CONFIG_FONT_8x8=y CONFIG_FONT_8x16=y CONFIG_LOGO=y -# CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y @@ -96,7 +88,6 @@ CONFIG_DEBUG_KERNEL=y # CONFIG_DEBUG_PREEMPT is not set # CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_FTRACE is not set # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig index fe45d2c9b15121ab75fc7291a56a13ed9ea3e874..825c641726c43193ea709b9f2c53f878f70fbaaa 100644 --- a/arch/sh/configs/apsh4ad0a_defconfig +++ b/arch/sh/configs/apsh4ad0a_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_BSD_PROCESS_ACCT=y @@ -53,7 +52,6 @@ CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_NET_KEY=y CONFIG_INET=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" @@ -70,8 +68,6 @@ CONFIG_NETDEVICES=y CONFIG_MDIO_BITBANG=y CONFIG_NET_ETHERNET=y CONFIG_SMSC911X=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_WLAN is not set CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_KEYBOARD is not set @@ -83,7 +79,6 @@ CONFIG_SERIAL_SH_SCI_CONSOLE=y # CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_FB=y CONFIG_FB_SH7785FB=y CONFIG_FRAMEBUFFER_CONSOLE=y @@ -124,6 +119,5 @@ CONFIG_DEBUG_SHIRQ=y CONFIG_DETECT_HUNG_TASK=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_VM=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_DWARF_UNWINDER=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/sh/configs/cayman_defconfig b/arch/sh/configs/cayman_defconfig index 67e150631ea514b72dc14e1b0f03b46ef11d550e..5a90e24aa8a6679dc61e1c2fde23fd60a7be12f8 100644 --- a/arch/sh/configs/cayman_defconfig +++ b/arch/sh/configs/cayman_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set @@ -19,7 +18,6 @@ CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y CONFIG_IP_PNP=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -38,7 +36,6 @@ CONFIG_NET_ETHERNET=y CONFIG_HW_RANDOM=y CONFIG_I2C=m CONFIG_WATCHDOG=y -CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_FB=y CONFIG_FIRMWARE_EDID=y CONFIG_FB_MODE_HELPERS=y @@ -67,5 +64,4 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y CONFIG_SCHEDSTATS=y CONFIG_FRAME_POINTER=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/sh/configs/dreamcast_defconfig b/arch/sh/configs/dreamcast_defconfig index ec243ca295292f2a2da487735551048996b741d1..3f08dc54480b8f9be93e4d24e645a05e0fac3930 100644 --- a/arch/sh/configs/dreamcast_defconfig +++ b/arch/sh/configs/dreamcast_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_LOG_BUF_SHIFT=14 @@ -32,7 +31,6 @@ CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_STANDALONE is not set @@ -43,8 +41,6 @@ CONFIG_NET_ETHERNET=y CONFIG_NET_PCI=y CONFIG_8139TOO=y # CONFIG_8139TOO_PIO is not set -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_KEYBOARD_ATKBD is not set CONFIG_KEYBOARD_MAPLE=y # CONFIG_MOUSE_PS2 is not set @@ -56,7 +52,6 @@ CONFIG_HW_RANDOM=y # CONFIG_HWMON is not set CONFIG_WATCHDOG=y CONFIG_SH_WDT=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_FB=y CONFIG_FIRMWARE_EDID=y CONFIG_FB_PVR2=y @@ -74,5 +69,4 @@ CONFIG_LOGO=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/sh/configs/ecovec24-romimage_defconfig b/arch/sh/configs/ecovec24-romimage_defconfig index 5fcb17bff24a3d8ec19e023e647045ee554aa098..0c5dfccbfe37d1805d00028f1c8654abaab06c68 100644 --- a/arch/sh/configs/ecovec24-romimage_defconfig +++ b/arch/sh/configs/ecovec24-romimage_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y @@ -26,19 +25,15 @@ CONFIG_INET=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_MISC_DEVICES is not set CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y # CONFIG_SCSI_LOWLEVEL is not set CONFIG_NETDEVICES=y CONFIG_NET_ETHERNET=y CONFIG_SH_ETH=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -51,7 +46,6 @@ CONFIG_I2C=y CONFIG_I2C_SH_MOBILE=y CONFIG_GPIO_SYSFS=y # CONFIG_HWMON is not set -# CONFIG_HID_SUPPORT is not set CONFIG_USB=y CONFIG_USB_R8A66597_HCD=y CONFIG_USB_STORAGE=y @@ -64,4 +58,3 @@ CONFIG_TMPFS=y # CONFIG_NETWORK_FILESYSTEMS is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_FS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set diff --git a/arch/sh/configs/ecovec24_defconfig b/arch/sh/configs/ecovec24_defconfig index 0b364e3b0ff8206a9f40aabfc9febdb7227b78ff..3568310c2c2fc120434deca34a8dc9962e2b34f8 100644 --- a/arch/sh/configs/ecovec24_defconfig +++ b/arch/sh/configs/ecovec24_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y @@ -29,16 +28,12 @@ CONFIG_IP_PNP_DHCP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_IRDA=y CONFIG_SH_SIR=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y -CONFIG_MTD_CONCAT=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -53,8 +48,6 @@ CONFIG_NETDEVICES=y CONFIG_SMSC_PHY=y CONFIG_NET_ETHERNET=y CONFIG_SH_ETH=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT_MOUSEDEV is not set CONFIG_INPUT_EVDEV=y # CONFIG_KEYBOARD_ATKBD is not set @@ -140,8 +133,6 @@ CONFIG_NLS_CODEPAGE_932=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_FS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_CRYPTO=y CONFIG_CRYPTO_CBC=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/sh/configs/edosk7705_defconfig b/arch/sh/configs/edosk7705_defconfig index 41fa3a7eed961432c6f11121843ce90a0838087e..db756e0990527d3f950ef233febc8a0b5772b993 100644 --- a/arch/sh/configs/edosk7705_defconfig +++ b/arch/sh/configs/edosk7705_defconfig @@ -20,7 +20,6 @@ CONFIG_CPU_SUBTYPE_SH7705=y CONFIG_SH_EDOSK7705=y CONFIG_SH_PCLK_FREQ=31250000 # CONFIG_PREVENT_FIRMWARE_BUILD is not set -# CONFIG_MISC_DEVICES is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set @@ -35,5 +34,4 @@ CONFIG_SH_PCLK_FREQ=31250000 # CONFIG_SYSFS is not set # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_CRC32 is not set diff --git a/arch/sh/configs/edosk7760_defconfig b/arch/sh/configs/edosk7760_defconfig index e1077a041ac3a453ea6381ce573f2d508144118f..aab4ff1e247c898b29e78d206d51dc0d46fb0b34 100644 --- a/arch/sh/configs/edosk7760_defconfig +++ b/arch/sh/configs/edosk7760_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_LOCALVERSION="_edosk7760" CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -31,7 +30,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -39,10 +37,7 @@ CONFIG_DEBUG_DRIVER=y CONFIG_DEBUG_DEVRES=y CONFIG_MTD=y CONFIG_MTD_DEBUG=y -CONFIG_MTD_CONCAT=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y @@ -62,12 +57,9 @@ CONFIG_MTD_ABSENT=y CONFIG_MTD_PHYSMAP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=26000 -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y CONFIG_NET_ETHERNET=y CONFIG_SMC91X=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -92,7 +84,6 @@ CONFIG_SND=y # CONFIG_SND_VERBOSE_PROCFS is not set CONFIG_SND_VERBOSE_PRINTK=y CONFIG_SND_SOC=y -# CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y @@ -119,8 +110,6 @@ CONFIG_DETECT_HUNG_TASK=y # CONFIG_SCHED_DEBUG is not set CONFIG_TIMER_STATS=y CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_CRYPTO=y CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_DES=y diff --git a/arch/sh/configs/espt_defconfig b/arch/sh/configs/espt_defconfig index 67cb1094a03333dfc84a2f2214d75bfc6450e342..2985fe7c6d5009a0afb99aac741e767520b0098b 100644 --- a/arch/sh/configs/espt_defconfig +++ b/arch/sh/configs/espt_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y @@ -26,13 +25,10 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y @@ -43,14 +39,11 @@ CONFIG_MTD_CFI_GEOMETRY=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_PHYSMAP=y -# CONFIG_MISC_DEVICES is not set CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_NETDEVICES=y CONFIG_NET_ETHERNET=y CONFIG_SH_ETH=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -65,7 +58,6 @@ CONFIG_FB_FOREIGN_ENDIAN=y CONFIG_FB_SH7760=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y -# CONFIG_HID_SUPPORT is not set CONFIG_USB=y CONFIG_USB_MON=y CONFIG_USB_OHCI_HCD=y @@ -73,7 +65,6 @@ CONFIG_USB_STORAGE=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_AUTOFS_FS=y CONFIG_AUTOFS4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y @@ -123,6 +114,5 @@ CONFIG_NLS_UTF8=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_FS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/hp6xx_defconfig b/arch/sh/configs/hp6xx_defconfig index 496edcdf95a3e8f246113757a99b791dbd15cdd2..4dcf7f5525829abe3c7b5751c91370fc9fb5f269 100644 --- a/arch/sh/configs/hp6xx_defconfig +++ b/arch/sh/configs/hp6xx_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y @@ -37,7 +36,6 @@ CONFIG_SERIAL_SH_SCI_NR_UARTS=3 CONFIG_SERIAL_SH_SCI_CONSOLE=y CONFIG_LEGACY_PTY_COUNT=64 # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_FB=y CONFIG_FIRMWARE_EDID=y CONFIG_FB_HIT=y @@ -46,7 +44,6 @@ CONFIG_BACKLIGHT_LCD_SUPPORT=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FONTS=y CONFIG_FONT_PEARL_8x8=y -# CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SH=y @@ -55,7 +52,6 @@ CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y CONFIG_NLS_CODEPAGE_850=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_CRYPTO=y CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_ECB=y diff --git a/arch/sh/configs/kfr2r09-romimage_defconfig b/arch/sh/configs/kfr2r09-romimage_defconfig index 029a506ca3258da0bb7bd957548859061cbe6487..9cc37f29e3b4b6ec1f3bfea6dde07cae3bcd5d9c 100644 --- a/arch/sh/configs/kfr2r09-romimage_defconfig +++ b/arch/sh/configs/kfr2r09-romimage_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y @@ -26,12 +25,10 @@ CONFIG_INET=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_MISC_DEVICES is not set # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -44,7 +41,6 @@ CONFIG_I2C=y CONFIG_I2C_SH_MOBILE=y CONFIG_GPIO_SYSFS=y # CONFIG_HWMON is not set -# CONFIG_HID_SUPPORT is not set CONFIG_USB_GADGET=y CONFIG_USB_CDC_COMPOSITE=y # CONFIG_DNOTIFY is not set @@ -55,5 +51,4 @@ CONFIG_TMPFS=y # CONFIG_NETWORK_FILESYSTEMS is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_FS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_CRC32 is not set diff --git a/arch/sh/configs/kfr2r09_defconfig b/arch/sh/configs/kfr2r09_defconfig index fac13ded07b2bd71c6bbac7e0e48dc28bf86112d..46693d0336446ccbc0c0e34ad1fc05fd17b69ccb 100644 --- a/arch/sh/configs/kfr2r09_defconfig +++ b/arch/sh/configs/kfr2r09_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y @@ -33,15 +32,12 @@ CONFIG_INET=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y -CONFIG_MTD_CONCAT=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_INTELEXT=y @@ -49,7 +45,6 @@ CONFIG_MTD_PHYSMAP=y CONFIG_MTD_ONENAND=y CONFIG_MTD_ONENAND_GENERIC=y CONFIG_MTD_UBI=y -# CONFIG_MISC_DEVICES is not set # CONFIG_INPUT_MOUSEDEV is not set CONFIG_INPUT_EVDEV=y # CONFIG_KEYBOARD_ATKBD is not set @@ -77,7 +72,6 @@ CONFIG_LOGO=y # CONFIG_LOGO_LINUX_CLUT224 is not set # CONFIG_LOGO_SUPERH_MONO is not set # CONFIG_LOGO_SUPERH_CLUT224 is not set -# CONFIG_HID_SUPPORT is not set CONFIG_USB_GADGET=y CONFIG_USB_CDC_COMPOSITE=m CONFIG_MMC=y @@ -91,4 +85,3 @@ CONFIG_TMPFS=y # CONFIG_NETWORK_FILESYSTEMS is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_FS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig index 6783f31315c7eeda3861fbb2a286964fc159c0fd..467f4d2d8e87f4a9fd63f7f15dc54b8cf82555ed 100644 --- a/arch/sh/configs/landisk_defconfig +++ b/arch/sh/configs/landisk_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_SYSCTL_SYSCALL is not set @@ -24,10 +23,8 @@ CONFIG_UNIX=y CONFIG_INET=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_PNP=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_NETFILTER=y -CONFIG_IP_NF_QUEUE=m CONFIG_ATALK=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_BLK_DEV_LOOP=y @@ -118,7 +115,6 @@ CONFIG_NFSD_V3=y CONFIG_SMB_FS=m CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_932=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SH_STANDARD_BIOS=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/lboxre2_defconfig b/arch/sh/configs/lboxre2_defconfig index e3c0894b1bb4abc5a55eeaabfa74caa369d0f675..9e3edfdf9b2e3ac43f9b5e5bf9e837d68f4b8600 100644 --- a/arch/sh/configs/lboxre2_defconfig +++ b/arch/sh/configs/lboxre2_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_SYSCTL_SYSCALL is not set @@ -28,7 +27,6 @@ CONFIG_UNIX=y CONFIG_INET=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_PNP=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_NETFILTER=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" @@ -61,7 +59,6 @@ CONFIG_VFAT_FS=y CONFIG_TMPFS=y CONFIG_ROMFS_FS=y CONFIG_NLS_CODEPAGE_437=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SH_STANDARD_BIOS=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/magicpanelr2_defconfig b/arch/sh/configs/magicpanelr2_defconfig index 9479872b1ae6b0332a97b6d5d53adb93831d6ee6..fb7415dbc102ed08a2ae72d5443963968466b48a 100644 --- a/arch/sh/configs/magicpanelr2_defconfig +++ b/arch/sh/configs/magicpanelr2_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -35,16 +34,13 @@ CONFIG_IP_PNP_DHCP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set CONFIG_MTD=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_REDBOOT_PARTS=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -55,8 +51,6 @@ CONFIG_NETDEVICES=y CONFIG_SMSC_PHY=y CONFIG_NET_ETHERNET=y CONFIG_SMSC911X=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_INPUT_EVDEV=y # CONFIG_MOUSE_PS2 is not set CONFIG_SERIAL_8250=y @@ -68,7 +62,6 @@ CONFIG_SERIAL_SH_SCI=y CONFIG_SERIAL_SH_SCI_CONSOLE=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set -# CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set CONFIG_RTC_CLASS=y # CONFIG_RTC_HCTOSYS is not set @@ -96,7 +89,5 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_KOBJECT=y CONFIG_DEBUG_INFO=y CONFIG_FRAME_POINTER=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_CRC_CCITT=m CONFIG_CRC16=m diff --git a/arch/sh/configs/microdev_defconfig b/arch/sh/configs/microdev_defconfig index f1d2e1b5ee419eaad55cadab5f230c6ace448c92..c3f7d5899922259473a9530a71b6112ab9523d93 100644 --- a/arch/sh/configs/microdev_defconfig +++ b/arch/sh/configs/microdev_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y @@ -19,7 +18,6 @@ CONFIG_SUPERHYWAY=y CONFIG_NET=y CONFIG_INET=y CONFIG_IP_PNP=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -45,6 +43,5 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_CRYPTO_ECB=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/sh/configs/migor_defconfig b/arch/sh/configs/migor_defconfig index cc61eda44922ed131baf608f25695139fc2efb93..e04f21be0756dc5ec3ea8c472489fa993fa01ebc 100644 --- a/arch/sh/configs/migor_defconfig +++ b/arch/sh/configs/migor_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y @@ -26,15 +25,11 @@ CONFIG_UNIX=y CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_FW_LOADER=m CONFIG_MTD=y -CONFIG_MTD_CONCAT=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -47,8 +42,6 @@ CONFIG_BLK_DEV_SD=y CONFIG_NETDEVICES=y CONFIG_NET_ETHERNET=y CONFIG_SMC91X=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT_MOUSEDEV is not set CONFIG_INPUT_EVDEV=y # CONFIG_KEYBOARD_ATKBD is not set @@ -101,7 +94,6 @@ CONFIG_TMPFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_DEBUG_FS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_CRYPTO_MANAGER=y # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set diff --git a/arch/sh/configs/polaris_defconfig b/arch/sh/configs/polaris_defconfig index f3d5d9f76310612d42a69c457835875b57f4e81b..0a432b5f50e74c7df47f4d45a654207b592fd4f8 100644 --- a/arch/sh/configs/polaris_defconfig +++ b/arch/sh/configs/polaris_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set # CONFIG_SWAP is not set CONFIG_SYSVIPC=y @@ -37,14 +36,11 @@ CONFIG_IP_MULTICAST=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_MTD=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_ADV_OPTIONS=y @@ -57,8 +53,6 @@ CONFIG_NETDEVICES=y CONFIG_SMSC_PHY=y CONFIG_NET_ETHERNET=y CONFIG_SMSC911X=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -71,7 +65,6 @@ CONFIG_SERIAL_SH_SCI_CONSOLE=y # CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set -# CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SH=y @@ -91,5 +84,3 @@ CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_DEBUG_SPINLOCK_SLEEP=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_SG=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y diff --git a/arch/sh/configs/r7780mp_defconfig b/arch/sh/configs/r7780mp_defconfig index 920b8471ceb766ff35ed309bd2e5435e860822e1..435bcd66c667e491058cc13590af79d4d71311a9 100644 --- a/arch/sh/configs/r7780mp_defconfig +++ b/arch/sh/configs/r7780mp_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y @@ -35,13 +34,11 @@ CONFIG_INET=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_BRIDGE=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_FW_LOADER=m CONFIG_MTD=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_COMPLEX_MAPPINGS=y @@ -110,7 +107,6 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_PREEMPT is not set CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y diff --git a/arch/sh/configs/r7785rp_defconfig b/arch/sh/configs/r7785rp_defconfig index c77da6be06b8975ed68a992bb96ae6fab3bc9d24..5877e6d1f285d011c32656f9ce92ec7f63b185a9 100644 --- a/arch/sh/configs/r7785rp_defconfig +++ b/arch/sh/configs/r7785rp_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_BSD_PROCESS_ACCT=y @@ -42,7 +41,6 @@ CONFIG_INET=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_BRIDGE=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" @@ -104,7 +102,6 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_DEBUG_LOCKING_API_SELFTESTS=y CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SH_STANDARD_BIOS=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_4KSTACKS=y diff --git a/arch/sh/configs/rsk7201_defconfig b/arch/sh/configs/rsk7201_defconfig index 5df916d931c5eb6d0a6ab4f29e2c0d2be43895e0..b195bc01e406f6030fa20b3f52f5b6c1e7f31a14 100644 --- a/arch/sh/configs/rsk7201_defconfig +++ b/arch/sh/configs/rsk7201_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y @@ -37,10 +36,7 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_PREVENT_FIRMWARE_BUILD is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CONCAT=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_REDBOOT_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -58,8 +54,6 @@ CONFIG_SERIAL_SH_SCI_CONSOLE=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set CONFIG_THERMAL=y -CONFIG_VIDEO_OUTPUT_CONTROL=y -# CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SH=y @@ -71,5 +65,3 @@ CONFIG_ROMFS_FS=y # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y diff --git a/arch/sh/configs/rsk7203_defconfig b/arch/sh/configs/rsk7203_defconfig index 3c4f6f4d52b0df695c1b36154e13546aacec189f..8c471959bbc7bfd501e18088e80b37a5792eb1f6 100644 --- a/arch/sh/configs/rsk7203_defconfig +++ b/arch/sh/configs/rsk7203_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -44,7 +43,6 @@ CONFIG_IP_PNP_DHCP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" @@ -52,10 +50,7 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_PREVENT_FIRMWARE_BUILD is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CONCAT=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_REDBOOT_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -64,8 +59,6 @@ CONFIG_NETDEVICES=y CONFIG_SMSC_PHY=y CONFIG_NET_ETHERNET=y CONFIG_SMSC911X=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_INPUT_FF_MEMLESS=m # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set @@ -81,7 +74,6 @@ CONFIG_SERIAL_SH_SCI_CONSOLE=y # CONFIG_HWMON is not set CONFIG_THERMAL=y CONFIG_REGULATOR=y -CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_HID_A4TECH=y CONFIG_HID_APPLE=y CONFIG_HID_BELKIN=y @@ -130,6 +122,4 @@ CONFIG_DEBUG_VM=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y CONFIG_FRAME_POINTER=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_DEBUG_STACK_USAGE=y diff --git a/arch/sh/configs/rsk7264_defconfig b/arch/sh/configs/rsk7264_defconfig index eecdf65bb7892e56b7404b8055409ba75dac03ea..2b9b731fc86b4a0401605f270a814b28b170b15f 100644 --- a/arch/sh/configs/rsk7264_defconfig +++ b/arch/sh/configs/rsk7264_defconfig @@ -35,7 +35,6 @@ CONFIG_IP_PNP_DHCP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" @@ -61,11 +60,9 @@ CONFIG_SERIAL_SH_SCI_CONSOLE=y # CONFIG_HWMON is not set CONFIG_USB=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_R8A66597_HCD=y CONFIG_USB_STORAGE=y CONFIG_USB_STORAGE_DEBUG=y -CONFIG_USB_LIBUSUAL=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set diff --git a/arch/sh/configs/rsk7269_defconfig b/arch/sh/configs/rsk7269_defconfig index 8370b10df35721e02c01adcc98ddae4747a1129a..d041f7bcb84c34a11763bff85319bf078bfa1b89 100644 --- a/arch/sh/configs/rsk7269_defconfig +++ b/arch/sh/configs/rsk7269_defconfig @@ -24,7 +24,6 @@ CONFIG_IP_PNP_DHCP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" @@ -44,11 +43,9 @@ CONFIG_SERIAL_SH_SCI_CONSOLE=y # CONFIG_HWMON is not set CONFIG_USB=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_R8A66597_HCD=y CONFIG_USB_STORAGE=y CONFIG_USB_STORAGE_DEBUG=y -CONFIG_USB_LIBUSUAL=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set @@ -60,5 +57,4 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_MUST_CHECK is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_FTRACE is not set diff --git a/arch/sh/configs/rts7751r2d1_defconfig b/arch/sh/configs/rts7751r2d1_defconfig index a3d081095ce23a293dedf7e4aeee44a27e67e90a..379d673f5ce8492f9757712d6ef56f4a81101d7e 100644 --- a/arch/sh/configs/rts7751r2d1_defconfig +++ b/arch/sh/configs/rts7751r2d1_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set @@ -22,7 +21,6 @@ CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_FW_LOADER=m @@ -48,7 +46,6 @@ CONFIG_HW_RANDOM=y CONFIG_SPI=y CONFIG_SPI_SH_SCI=y CONFIG_MFD_SM501=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_FB=y CONFIG_FB_SH_MOBILE_LCDC=m CONFIG_FB_SM501=y @@ -83,7 +80,6 @@ CONFIG_USB=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y CONFIG_USB_OHCI_HCD=y CONFIG_USB_STORAGE=y -CONFIG_USB_LIBUSUAL=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_R9701=y CONFIG_EXT2_FS=y @@ -94,6 +90,5 @@ CONFIG_TMPFS=y CONFIG_MINIX_FS=y CONFIG_NLS_CODEPAGE_932=y CONFIG_DEBUG_FS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/rts7751r2dplus_defconfig b/arch/sh/configs/rts7751r2dplus_defconfig index b1a04f3c598bb1088bf2f363c0a84ab8f431c909..11177bceda834726c5bdb0518962a535e289d1bc 100644 --- a/arch/sh/configs/rts7751r2dplus_defconfig +++ b/arch/sh/configs/rts7751r2dplus_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set @@ -22,15 +21,11 @@ CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_FW_LOADER=m CONFIG_MTD=y -CONFIG_MTD_CONCAT=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP=y @@ -56,7 +51,6 @@ CONFIG_HW_RANDOM=y CONFIG_SPI=y CONFIG_SPI_SH_SCI=y CONFIG_MFD_SM501=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_FB=y CONFIG_FB_SH_MOBILE_LCDC=m CONFIG_FB_SM501=y @@ -91,7 +85,6 @@ CONFIG_USB=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y CONFIG_USB_OHCI_HCD=y CONFIG_USB_STORAGE=y -CONFIG_USB_LIBUSUAL=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_R9701=y CONFIG_EXT2_FS=y @@ -102,6 +95,5 @@ CONFIG_TMPFS=y CONFIG_MINIX_FS=y CONFIG_NLS_CODEPAGE_932=y CONFIG_DEBUG_FS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/sdk7780_defconfig b/arch/sh/configs/sdk7780_defconfig index bbd4c2298708f8202433c4da43ffe4522813e941..95e5208b82600b38cb2d04d14dcdb1023996560b 100644 --- a/arch/sh/configs/sdk7780_defconfig +++ b/arch/sh/configs/sdk7780_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_LOCALVERSION="_SDK7780" CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -39,7 +38,6 @@ CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_PNP=y CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set CONFIG_IPV6=y # CONFIG_INET6_XFRM_MODE_BEET is not set CONFIG_NET_SCHED=y @@ -47,7 +45,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_PARPORT=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y -# CONFIG_MISC_DEVICES is not set CONFIG_IDE=y CONFIG_BLK_DEV_IDECD=y CONFIG_BLK_DEV_PLATFORM=y @@ -63,8 +60,6 @@ CONFIG_BLK_DEV_DM=y CONFIG_NETDEVICES=y CONFIG_NET_ETHERNET=y CONFIG_SMC91X=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_NETCONSOLE=y CONFIG_INPUT_FF_MEMLESS=m CONFIG_INPUT_EVDEV=y @@ -78,7 +73,6 @@ CONFIG_SSB=y CONFIG_SSB_DRIVER_PCICORE=y CONFIG_FB=y CONFIG_FB_SH_MOBILE_LCDC=m -CONFIG_DISPLAY_SUPPORT=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y CONFIG_LOGO=y @@ -101,7 +95,6 @@ CONFIG_HID_SAMSUNG=y CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_USB=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y # CONFIG_USB_EHCI_TT_NEWSCHED is not set @@ -144,8 +137,6 @@ CONFIG_DETECT_HUNG_TASK=y # CONFIG_SCHED_DEBUG is not set CONFIG_TIMER_STATS=y CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_SH_STANDARD_BIOS=y CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_DES=y diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig index 36642ec2cb9799aabd2c90f294ebf970cd949825..e9ee0c878ead383e686be75ed510630a61545f9f 100644 --- a/arch/sh/configs/sdk7786_defconfig +++ b/arch/sh/configs/sdk7786_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_KERNEL_LZO=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -90,13 +89,11 @@ CONFIG_NET_KEY=y CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y CONFIG_FTL=y @@ -119,7 +116,6 @@ CONFIG_MTD_UBI_GLUEBI=m CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_CRYPTOLOOP=y CONFIG_BLK_DEV_RAM=y -# CONFIG_MISC_DEVICES is not set CONFIG_IDE=y CONFIG_BLK_DEV_IDECD=y CONFIG_BLK_DEV_PLATFORM=y @@ -140,8 +136,6 @@ CONFIG_MDIO_BITBANG=y CONFIG_NET_ETHERNET=y CONFIG_SMC91X=y CONFIG_SMSC911X=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_WLAN is not set CONFIG_VT_HW_CONSOLE_BINDING=y CONFIG_SERIAL_SH_SCI=y @@ -157,7 +151,6 @@ CONFIG_SPI=y # CONFIG_HWMON is not set CONFIG_WATCHDOG=y CONFIG_SH_WDT=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_USB=y CONFIG_USB_MON=y CONFIG_USB_OHCI_HCD=y @@ -223,9 +216,7 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y CONFIG_TIMER_STATS=y CONFIG_DEBUG_MEMORY_INIT=y -# CONFIG_RCU_CPU_STALL_VERBOSE is not set CONFIG_LATENCYTOP=y -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_FUNCTION_TRACER=y # CONFIG_FUNCTION_GRAPH_TRACER is not set CONFIG_DMA_API_DEBUG=y diff --git a/arch/sh/configs/se7206_defconfig b/arch/sh/configs/se7206_defconfig index 91853a67ec34ace092f610591048952ae94a91e6..3553acd5edb1772ad78cf14538e24c759392a56c 100644 --- a/arch/sh/configs/se7206_defconfig +++ b/arch/sh/configs/se7206_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_BSD_PROCESS_ACCT=y @@ -57,7 +56,6 @@ CONFIG_IP_PNP_DHCP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" @@ -65,9 +63,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_PREVENT_FIRMWARE_BUILD is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CONCAT=y -CONFIG_MTD_PARTITIONS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -78,8 +73,6 @@ CONFIG_EEPROM_93CX6=y CONFIG_NETDEVICES=y CONFIG_NET_ETHERNET=y CONFIG_SMC91X=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set @@ -109,7 +102,6 @@ CONFIG_DEBUG_SPINLOCK_SLEEP=y CONFIG_DEBUG_VM=y CONFIG_DEBUG_LIST=y CONFIG_FRAME_POINTER=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_DEBUG_STACK_USAGE=y CONFIG_CRYPTO_DEFLATE=y CONFIG_CRYPTO_LZO=y diff --git a/arch/sh/configs/se7343_defconfig b/arch/sh/configs/se7343_defconfig index 201acb4652f71a773c975578a7913009c60a8ef5..fc77a67b16e7ff50a11b77ebc9a7cd80e7e14a08 100644 --- a/arch/sh/configs/se7343_defconfig +++ b/arch/sh/configs/se7343_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -27,26 +26,19 @@ CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y -CONFIG_MTD_CONCAT=y -CONFIG_MTD_PARTITIONS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_RAM=y CONFIG_MTD_PHYSMAP=y -# CONFIG_MISC_DEVICES is not set CONFIG_SCSI=y CONFIG_SCSI_MULTI_LUN=y # CONFIG_SCSI_LOWLEVEL is not set CONFIG_NETDEVICES=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_USB_USBNET=y # CONFIG_USB_NET_AX8817X is not set CONFIG_USB_NET_DM9601=y @@ -104,5 +96,4 @@ CONFIG_CRAMFS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y CONFIG_NFSD=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/sh/configs/se7619_defconfig b/arch/sh/configs/se7619_defconfig index 9a9ad9adf9597a516016b41e53250bb91b1678f7..f54722dbc8f52c8cfbea55179eb29d52c4353d94 100644 --- a/arch/sh/configs/se7619_defconfig +++ b/arch/sh/configs/se7619_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_LOG_BUF_SHIFT=14 # CONFIG_UID16 is not set @@ -24,10 +23,7 @@ CONFIG_BINFMT_ZFLAT=y # CONFIG_STANDALONE is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set CONFIG_MTD=y -CONFIG_MTD_CONCAT=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_REDBOOT_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -48,4 +44,3 @@ CONFIG_SERIAL_SH_SCI_CONSOLE=y # CONFIG_SYSFS is not set CONFIG_ROMFS_FS=y # CONFIG_ENABLE_MUST_CHECK is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set diff --git a/arch/sh/configs/se7705_defconfig b/arch/sh/configs/se7705_defconfig index 044e0844fda18ede97b143ea366e5f7afa0069d8..ddfc6984195569c10b9ecaf2a825a299cdf5b248 100644 --- a/arch/sh/configs/se7705_defconfig +++ b/arch/sh/configs/se7705_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_SWAP is not set CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y @@ -27,11 +26,8 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_MTD=y -CONFIG_MTD_PARTITIONS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -58,5 +54,4 @@ CONFIG_PROC_KCORE=y CONFIG_JFFS2_FS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig index 1248635e4f888a4a248f4efa7236243fb4ee0deb..5a1097641247893cd0f373d308462b5a8b865ffb 100644 --- a/arch/sh/configs/se7712_defconfig +++ b/arch/sh/configs/se7712_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set # CONFIG_SWAP is not set CONFIG_SYSVIPC=y @@ -47,7 +46,6 @@ CONFIG_SYN_COOKIES=y CONFIG_INET_AH=y CONFIG_INET_ESP=y CONFIG_INET_IPCOMP=y -# CONFIG_INET_LRO is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set CONFIG_NET_SCHED=y @@ -68,9 +66,6 @@ CONFIG_NET_CLS_FW=y CONFIG_NET_CLS_IND=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y -CONFIG_MTD_CONCAT=y -CONFIG_MTD_PARTITIONS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -104,8 +99,6 @@ CONFIG_ROOT_NFS=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO=y CONFIG_FRAME_POINTER=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig index c3ba6e8a981810542ecd702cf8ca2d9240496d74..9c0ef13bee10d3ac5e53a9503fc77fe4c9925c59 100644 --- a/arch/sh/configs/se7721_defconfig +++ b/arch/sh/configs/se7721_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set # CONFIG_SWAP is not set CONFIG_SYSVIPC=y @@ -46,7 +45,6 @@ CONFIG_SYN_COOKIES=y CONFIG_INET_AH=y CONFIG_INET_ESP=y CONFIG_INET_IPCOMP=y -# CONFIG_INET_LRO is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set CONFIG_NET_SCHED=y @@ -67,9 +65,6 @@ CONFIG_NET_CLS_FW=y CONFIG_NET_CLS_IND=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y -CONFIG_MTD_CONCAT=y -CONFIG_MTD_PARTITIONS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -132,6 +127,5 @@ CONFIG_NLS_ISO8859_1=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO=y CONFIG_FRAME_POINTER=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRC_CCITT=y diff --git a/arch/sh/configs/se7722_defconfig b/arch/sh/configs/se7722_defconfig index ae998c7e2ee0f5aa59a4e524ed2f6ba06305d78a..ccc7fc423fde7fe3f5b5185eae1bbbf23034e224 100644 --- a/arch/sh/configs/se7722_defconfig +++ b/arch/sh/configs/se7722_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y @@ -26,7 +25,6 @@ CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set @@ -57,6 +55,5 @@ CONFIG_PRINTK_TIME=y # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SH_STANDARD_BIOS=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/sh/configs/se7724_defconfig b/arch/sh/configs/se7724_defconfig index 1faa788aecae48754cadcb0cd8ff4600213f20bf..aedb3a2d9a10857377fe80c5e6a3a416f3bf8b96 100644 --- a/arch/sh/configs/se7724_defconfig +++ b/arch/sh/configs/se7724_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y @@ -30,14 +29,10 @@ CONFIG_IP_PNP_DHCP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y -CONFIG_MTD_CONCAT=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -53,8 +48,6 @@ CONFIG_SMSC_PHY=y CONFIG_NET_ETHERNET=y CONFIG_SH_ETH=y CONFIG_SMC91X=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT_MOUSEDEV is not set CONFIG_INPUT_EVDEV=y # CONFIG_KEYBOARD_ATKBD is not set @@ -137,8 +130,6 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_932=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_MUST_CHECK is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_CRYPTO=y CONFIG_CRYPTO_CBC=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/sh/configs/se7750_defconfig b/arch/sh/configs/se7750_defconfig index 912c98590e2215c69dde022f9698862f93fb4da5..b23f67542728cc3120be2b6a7de85f79eb518138 100644 --- a/arch/sh/configs/se7750_defconfig +++ b/arch/sh/configs/se7750_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y @@ -25,11 +24,8 @@ CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_MTD=y -CONFIG_MTD_PARTITIONS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -58,5 +54,4 @@ CONFIG_ROOT_NFS=y CONFIG_PARTITION_ADVANCED=y # CONFIG_MSDOS_PARTITION is not set # CONFIG_ENABLE_MUST_CHECK is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/sh/configs/se7751_defconfig b/arch/sh/configs/se7751_defconfig index 56b5e4ce8d4ac8a8de73074c9b90f19c72e21323..16234368393752d98630bb36049c58e313bbb4ab 100644 --- a/arch/sh/configs/se7751_defconfig +++ b/arch/sh/configs/se7751_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_LOG_BUF_SHIFT=14 @@ -25,12 +24,9 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_NETFILTER=y -CONFIG_IP_NF_QUEUE=y CONFIG_MTD=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -48,5 +44,4 @@ CONFIG_EXT2_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/sh/configs/se7780_defconfig b/arch/sh/configs/se7780_defconfig index b0ef63ce525aa6330a615fb243c6379381645db7..ec32c82646ed68c6df8e729a6747bdf53a566b9d 100644 --- a/arch/sh/configs/se7780_defconfig +++ b/arch/sh/configs/se7780_defconfig @@ -24,7 +24,6 @@ CONFIG_UNIX=y CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y -# CONFIG_INET_LRO is not set CONFIG_IPV6=y # CONFIG_INET6_XFRM_MODE_TRANSPORT is not set # CONFIG_INET6_XFRM_MODE_TUNNEL is not set @@ -32,8 +31,6 @@ CONFIG_IPV6=y # CONFIG_IPV6_SIT is not set # CONFIG_PREVENT_FIRMWARE_BUILD is not set CONFIG_MTD=y -CONFIG_MTD_PARTITIONS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_ADV_OPTIONS=y @@ -54,8 +51,6 @@ CONFIG_SMSC_PHY=y CONFIG_NET_ETHERNET=y CONFIG_SMC91X=y CONFIG_NET_PCI=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_INPUT_FF_MEMLESS=m # CONFIG_INPUT_MOUSEDEV_PSAUX is not set # CONFIG_INPUT_KEYBOARD is not set @@ -94,7 +89,6 @@ CONFIG_HID_SAMSUNG=y CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_USB=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y CONFIG_USB_OHCI_HCD=y @@ -110,5 +104,4 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_DEBUG_FS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/sh/configs/secureedge5410_defconfig b/arch/sh/configs/secureedge5410_defconfig index 7eae4e59d7f0437d399ae6a3ff92b00bf100ec13..360592d63a2f5dcd21d2685e3deb09b125917184 100644 --- a/arch/sh/configs/secureedge5410_defconfig +++ b/arch/sh/configs/secureedge5410_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_SWAP is not set CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y @@ -18,12 +17,9 @@ CONFIG_INET=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set CONFIG_MTD=y -CONFIG_MTD_PARTITIONS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK_RO=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_ADV_OPTIONS=y @@ -34,14 +30,11 @@ CONFIG_MTD_CFI_GEOMETRY=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_PLATRAM=y CONFIG_BLK_DEV_RAM=y -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y CONFIG_NET_ETHERNET=y CONFIG_NET_PCI=y CONFIG_8139CP=y CONFIG_8139TOO=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -51,7 +44,6 @@ CONFIG_SERIAL_SH_SCI=y CONFIG_SERIAL_SH_SCI_CONSOLE=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set -# CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1302=y @@ -60,4 +52,3 @@ CONFIG_EXT2_FS=y CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_ROMFS_FS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig index 0cf4097b71e8d02051a3976d86b63226ab2b8924..2156223405a160df5031ba0dcff300118a979262 100644 --- a/arch/sh/configs/sh03_defconfig +++ b/arch/sh/configs/sh03_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_BSD_PROCESS_ACCT=y @@ -34,7 +33,6 @@ CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_STANDALONE is not set @@ -70,7 +68,6 @@ CONFIG_EXT2_FS_XATTR=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_AUTOFS_FS=y CONFIG_AUTOFS4_FS=y CONFIG_ISO9660_FS=m CONFIG_JOLIET=y @@ -126,7 +123,6 @@ CONFIG_NLS_KOI8_R=m CONFIG_NLS_KOI8_U=m CONFIG_NLS_UTF8=m CONFIG_DEBUG_FS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SH_STANDARD_BIOS=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_HMAC=y diff --git a/arch/sh/configs/sh2007_defconfig b/arch/sh/configs/sh2007_defconfig index df25ae774ee0d2a3fcf46448413393b673b623d9..34094e05e892b888712f9e963d990e2903ba80a5 100644 --- a/arch/sh/configs/sh2007_defconfig +++ b/arch/sh/configs/sh2007_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -42,7 +41,6 @@ CONFIG_NET_IPIP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_NETWORK_SECMARK=y CONFIG_NET_PKTGEN=y @@ -50,7 +48,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_CDROM_PKTCDVD=y -# CONFIG_MISC_DEVICES is not set CONFIG_RAID_ATTRS=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y @@ -72,8 +69,6 @@ CONFIG_TUN=y CONFIG_VETH=y CONFIG_NET_ETHERNET=y CONFIG_SMSC911X=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_WLAN is not set CONFIG_INPUT_FF_MEMLESS=y # CONFIG_INPUT_MOUSEDEV is not set @@ -95,9 +90,7 @@ CONFIG_BACKLIGHT_LCD_SUPPORT=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y CONFIG_LOGO=y -# CONFIG_HID_SUPPORT is not set CONFIG_USB=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_MON=y CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y @@ -172,7 +165,6 @@ CONFIG_DEBUG_KERNEL=y # CONFIG_SCHED_DEBUG is not set CONFIG_DEBUG_INFO=y CONFIG_FRAME_POINTER=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SH_STANDARD_BIOS=y CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_AUTHENC=y diff --git a/arch/sh/configs/sh7710voipgw_defconfig b/arch/sh/configs/sh7710voipgw_defconfig index f92ad17cd629ffa93eae80be7dac2315e7f6e468..65a1aad899c8ea2a69a4809fb1b7253e4a3c58f6 100644 --- a/arch/sh/configs/sh7710voipgw_defconfig +++ b/arch/sh/configs/sh7710voipgw_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -24,7 +23,6 @@ CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set CONFIG_NETFILTER=y @@ -36,8 +34,6 @@ CONFIG_NET_CLS_ROUTE4=y CONFIG_NET_CLS_U32=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y -CONFIG_MTD_PARTITIONS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -59,5 +55,4 @@ CONFIG_THERMAL=y # CONFIG_DNOTIFY is not set CONFIG_JFFS2_FS=y CONFIG_DEBUG_FS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/sh/configs/sh7724_generic_defconfig b/arch/sh/configs/sh7724_generic_defconfig index f83ac7b0b031185c5e2f5e353c048228e03fa737..d15e53647983a33b90f8fbf4c1db29a6ec27ba44 100644 --- a/arch/sh/configs/sh7724_generic_defconfig +++ b/arch/sh/configs/sh7724_generic_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_CGROUPS=y @@ -18,7 +17,6 @@ CONFIG_HIBERNATION=y CONFIG_CPU_IDLE=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_PREVENT_FIRMWARE_BUILD is not set -# CONFIG_MISC_DEVICES is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set @@ -44,5 +42,4 @@ CONFIG_UIO_PDRV_GENIRQ=y # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_CRC32 is not set diff --git a/arch/sh/configs/sh7757lcr_defconfig b/arch/sh/configs/sh7757lcr_defconfig index cfde98ddb29d3e437b1c49d07d1e659c09158c1c..b0c4bc830fb8c968da640fd75dd88cb7b5efaeac 100644 --- a/arch/sh/configs/sh7757lcr_defconfig +++ b/arch/sh/configs/sh7757lcr_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -32,13 +31,11 @@ CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y -# CONFIG_INET_LRO is not set CONFIG_IPV6=y # CONFIG_WIRELESS is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_M25P80=y CONFIG_BLK_DEV_RAM=y @@ -48,7 +45,6 @@ CONFIG_NETDEVICES=y CONFIG_VITESSE_PHY=y CONFIG_NET_ETHERNET=y CONFIG_SH_ETH=y -# CONFIG_NETDEV_10000 is not set # CONFIG_WLAN is not set # CONFIG_KEYBOARD_ATKBD is not set # CONFIG_MOUSE_PS2 is not set diff --git a/arch/sh/configs/sh7763rdp_defconfig b/arch/sh/configs/sh7763rdp_defconfig index 4795364402643eb80c4cf919f72d4a03c267ca81..2ef780fb9813c9278bb63f0ef410b1df0e21f984 100644 --- a/arch/sh/configs/sh7763rdp_defconfig +++ b/arch/sh/configs/sh7763rdp_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y @@ -26,11 +25,9 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLKDEVS=y CONFIG_MTD_CFI=y @@ -43,14 +40,11 @@ CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_CFI_STAA=y CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_PHYSMAP=y -# CONFIG_MISC_DEVICES is not set CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_NETDEVICES=y CONFIG_NET_ETHERNET=y CONFIG_SH_ETH=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -65,7 +59,6 @@ CONFIG_FB_FOREIGN_ENDIAN=y CONFIG_FB_SH7760=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y -# CONFIG_HID_SUPPORT is not set CONFIG_USB=y CONFIG_USB_MON=y CONFIG_USB_OHCI_HCD=y @@ -74,7 +67,6 @@ CONFIG_MMC=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_AUTOFS_FS=y CONFIG_AUTOFS4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y @@ -124,6 +116,5 @@ CONFIG_NLS_UTF8=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_FS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRC_T10DIF=y diff --git a/arch/sh/configs/sh7770_generic_defconfig b/arch/sh/configs/sh7770_generic_defconfig index 025bd3ac5ab08799556e37bc462f1833247070a4..742634b37c0a0f74a54e6a36c3984ab6c6790d93 100644 --- a/arch/sh/configs/sh7770_generic_defconfig +++ b/arch/sh/configs/sh7770_generic_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_CGROUPS=y @@ -20,7 +19,6 @@ CONFIG_HIBERNATION=y CONFIG_CPU_IDLE=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_PREVENT_FIRMWARE_BUILD is not set -# CONFIG_MISC_DEVICES is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set @@ -46,5 +44,4 @@ CONFIG_UIO_PDRV_GENIRQ=y # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_CRC32 is not set diff --git a/arch/sh/configs/sh7785lcr_32bit_defconfig b/arch/sh/configs/sh7785lcr_32bit_defconfig index 2fce54d9c388496e8c3de442e6c3ddb835acddaf..2ddf5ca7094ebcb25d0e43440bb6ace4b2899dea 100644 --- a/arch/sh/configs/sh7785lcr_32bit_defconfig +++ b/arch/sh/configs/sh7785lcr_32bit_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_BSD_PROCESS_ACCT=y @@ -44,13 +43,9 @@ CONFIG_INET=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y -CONFIG_MTD_CONCAT=y -CONFIG_MTD_PARTITIONS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -58,7 +53,6 @@ CONFIG_MTD_PHYSMAP=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_RAM=y -# CONFIG_MISC_DEVICES is not set # CONFIG_SCSI_PROC_FS is not set CONFIG_BLK_DEV_SD=y # CONFIG_SCSI_LOWLEVEL is not set @@ -69,7 +63,6 @@ CONFIG_NET_ETHERNET=y CONFIG_NET_VENDOR_3COM=y CONFIG_VORTEX=y CONFIG_R8169=y -# CONFIG_NETDEV_10000 is not set # CONFIG_WLAN is not set CONFIG_INPUT_FF_MEMLESS=m CONFIG_INPUT_EVDEV=y @@ -113,7 +106,6 @@ CONFIG_SND_CMIPCI=y CONFIG_SND_EMU10K1=y # CONFIG_SND_SUPERH is not set CONFIG_USB=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_R8A66597_HCD=y CONFIG_USB_STORAGE=y CONFIG_MMC=y @@ -154,9 +146,7 @@ CONFIG_DEBUG_SPINLOCK=y CONFIG_DEBUG_MUTEXES=y CONFIG_DEBUG_SPINLOCK_SLEEP=y CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_LATENCYTOP=y -CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_FTRACE is not set CONFIG_CRYPTO_HMAC=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/sh/configs/sh7785lcr_defconfig b/arch/sh/configs/sh7785lcr_defconfig index d29da4a0f6c271f0a4b3b90d4b19d1710e3e3081..7098828d392e2102d738328e4c4828fbb556cb9e 100644 --- a/arch/sh/configs/sh7785lcr_defconfig +++ b/arch/sh/configs/sh7785lcr_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y @@ -26,27 +25,21 @@ CONFIG_INET=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CONCAT=y -CONFIG_MTD_PARTITIONS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP=y CONFIG_BLK_DEV_RAM=y -# CONFIG_MISC_DEVICES is not set CONFIG_BLK_DEV_SD=y # CONFIG_SCSI_LOWLEVEL is not set CONFIG_ATA=y CONFIG_SATA_SIL=y CONFIG_NETDEVICES=y CONFIG_R8169=y -# CONFIG_NETDEV_10000 is not set CONFIG_INPUT_FF_MEMLESS=m # CONFIG_INPUT_MOUSEDEV_PSAUX is not set # CONFIG_KEYBOARD_ATKBD is not set @@ -121,8 +114,6 @@ CONFIG_NLS_ISO8859_1=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_CRYPTO_HMAC=y # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set diff --git a/arch/sh/configs/shmin_defconfig b/arch/sh/configs/shmin_defconfig index 4802e14a4649aa977af83b8af6e8e6916e437147..d589cfdfb7ebb11a62008a5a0e0ab2155499ba67 100644 --- a/arch/sh/configs/shmin_defconfig +++ b/arch/sh/configs/shmin_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_SWAP is not set CONFIG_LOG_BUF_SHIFT=14 # CONFIG_UID16 is not set @@ -28,10 +27,8 @@ CONFIG_NET=y CONFIG_UNIX=y CONFIG_INET=y CONFIG_IP_PNP=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_MTD=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y @@ -53,6 +50,5 @@ CONFIG_CRAMFS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SH_STANDARD_BIOS=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/sh/configs/shx3_defconfig b/arch/sh/configs/shx3_defconfig index 4a4269ad5b04539d5b716775a8552e6f4853951f..755c4f73c71838635774e8f2a3d50e62df9fc254 100644 --- a/arch/sh/configs/shx3_defconfig +++ b/arch/sh/configs/shx3_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_BSD_PROCESS_ACCT=y @@ -56,7 +55,6 @@ CONFIG_NET=y CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y -# CONFIG_INET_LRO is not set CONFIG_CAN=m CONFIG_CAN_RAW=m CONFIG_CAN_BCM=m @@ -70,8 +68,6 @@ CONFIG_PATA_PLATFORM=y CONFIG_NETDEVICES=y CONFIG_NET_ETHERNET=y CONFIG_SMC91X=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set @@ -82,7 +78,6 @@ CONFIG_I2C=m CONFIG_SPI=y # CONFIG_HWMON is not set CONFIG_WATCHDOG=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_USB=y CONFIG_USB_MON=y CONFIG_USB_R8A66597_HCD=m @@ -104,7 +99,6 @@ CONFIG_DEBUG_SHIRQ=y CONFIG_DETECT_HUNG_TASK=y CONFIG_DEBUG_VM=y CONFIG_FRAME_POINTER=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SH_STANDARD_BIOS=y CONFIG_DEBUG_STACK_USAGE=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig index a77b778c745b98ea76292d76b2309959a739b08a..ceb48e9b70f4fbc6d8b2af38dffe888b3b739e35 100644 --- a/arch/sh/configs/titan_defconfig +++ b/arch/sh/configs/titan_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -49,7 +48,6 @@ CONFIG_SYN_COOKIES=y CONFIG_INET_AH=y CONFIG_INET_ESP=y CONFIG_INET_IPCOMP=y -# CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_IPV6=y CONFIG_IPV6_PRIVACY=y @@ -79,7 +77,6 @@ CONFIG_NETFILTER_XT_MATCH_REALM=m CONFIG_NETFILTER_XT_MATCH_SCTP=m CONFIG_NETFILTER_XT_MATCH_STRING=m CONFIG_NETFILTER_XT_MATCH_TCPMSS=m -CONFIG_IP_NF_QUEUE=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_ADDRTYPE=m CONFIG_IP_NF_MATCH_AH=m @@ -88,7 +85,6 @@ CONFIG_IP_NF_MATCH_TTL=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m @@ -96,7 +92,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_IP6_NF_QUEUE=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -106,7 +101,6 @@ CONFIG_IP6_NF_MATCH_HL=m CONFIG_IP6_NF_MATCH_IPV6HEADER=m CONFIG_IP6_NF_MATCH_RT=m CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_TARGET_LOG=m CONFIG_IP6_NF_FILTER=m CONFIG_IP6_NF_TARGET_REJECT=m CONFIG_IP6_NF_MANGLE=m @@ -154,7 +148,6 @@ CONFIG_FW_LOADER=m CONFIG_CONNECTOR=m CONFIG_MTD=m CONFIG_MTD_DEBUG=y -CONFIG_MTD_CHAR=m CONFIG_MTD_BLOCK=m CONFIG_FTL=m CONFIG_NFTL=m @@ -261,7 +254,6 @@ CONFIG_NLS_UTF8=m CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y # CONFIG_DEBUG_BUGVERBOSE is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_MD4=m diff --git a/arch/sh/configs/ul2_defconfig b/arch/sh/configs/ul2_defconfig index 2d288b887fbdd8c56bb45c6a6d67273abc8fac77..5f2921a8519265f4c1b028d8bbac3b200c39d923 100644 --- a/arch/sh/configs/ul2_defconfig +++ b/arch/sh/configs/ul2_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y @@ -29,7 +28,6 @@ CONFIG_UNIX=y CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_CFG80211=y CONFIG_MAC80211=y @@ -37,9 +35,6 @@ CONFIG_MAC80211_RC_PID=y # CONFIG_MAC80211_RC_MINSTREL is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y -CONFIG_MTD_CONCAT=y -CONFIG_MTD_PARTITIONS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -50,8 +45,6 @@ CONFIG_ATA=y CONFIG_PATA_PLATFORM=y CONFIG_NETDEVICES=y CONFIG_NET_ETHERNET=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_LIBERTAS=m CONFIG_LIBERTAS_SDIO=m CONFIG_LIBERTAS_DEBUG=y @@ -70,7 +63,6 @@ CONFIG_SERIAL_SH_SCI_CONSOLE=y # CONFIG_UNIX98_PTYS is not set # CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set -# CONFIG_HID_SUPPORT is not set CONFIG_USB=y CONFIG_USB_MON=y CONFIG_USB_R8A66597_HCD=y @@ -92,6 +84,5 @@ CONFIG_NLS_CODEPAGE_932=y CONFIG_NLS_ISO8859_1=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_CRYPTO_MICHAEL_MIC=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/sh/configs/urquell_defconfig b/arch/sh/configs/urquell_defconfig index 01c9a91ee896bbcfce76741ccdb72511e1909feb..7d5591b7c0886dc6ff10a77e53122bd14b408fa3 100644 --- a/arch/sh/configs/urquell_defconfig +++ b/arch/sh/configs/urquell_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_BSD_PROCESS_ACCT=y @@ -46,20 +45,15 @@ CONFIG_INET=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y -# CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CONCAT=y -CONFIG_MTD_PARTITIONS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP=y CONFIG_BLK_DEV_RAM=y -# CONFIG_MISC_DEVICES is not set CONFIG_BLK_DEV_SD=y # CONFIG_SCSI_LOWLEVEL is not set CONFIG_ATA=y @@ -73,7 +67,6 @@ CONFIG_NET_PCI=y CONFIG_8139CP=y CONFIG_SKY2=y CONFIG_SKY2_DEBUG=y -# CONFIG_NETDEV_10000 is not set CONFIG_INPUT_FF_MEMLESS=m # CONFIG_INPUT_MOUSEDEV_PSAUX is not set # CONFIG_KEYBOARD_ATKBD is not set @@ -150,8 +143,6 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y CONFIG_DEBUG_INFO=y CONFIG_FRAME_POINTER=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_FTRACE is not set # CONFIG_DUMP_CODE is not set CONFIG_CRYPTO_HMAC=y diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index a4a626199c47e1afce31559a028df4cda331f619..0be3828752e5bc9ddf33ba74c9e5b9efd03691e4 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -97,6 +97,9 @@ config ARCH_PROC_KCORE_TEXT config CPU_BIG_ENDIAN def_bool y +config CPU_BIG_ENDIAN + def_bool y + config ARCH_ATU bool default y if SPARC64 diff --git a/arch/sparc/include/asm/vga.h b/arch/sparc/include/asm/vga.h index ec0e9967d93d5904eeb9b9c1ab10a1794321c810..f54e8b6fb1972e8282f38c792cb77e9793950fd0 100644 --- a/arch/sparc/include/asm/vga.h +++ b/arch/sparc/include/asm/vga.h @@ -8,9 +8,13 @@ #define _LINUX_ASM_VGA_H_ #include +#include #include #define VT_BUF_HAVE_RW +#define VT_BUF_HAVE_MEMSETW +#define VT_BUF_HAVE_MEMCPYW +#define VT_BUF_HAVE_MEMMOVEW #undef scr_writew #undef scr_readw @@ -29,6 +33,27 @@ static inline u16 scr_readw(const u16 *addr) return *addr; } +static inline void scr_memsetw(u16 *p, u16 v, unsigned int n) +{ + BUG_ON((long) p >= 0); + + memset16(p, cpu_to_le16(v), n / 2); +} + +static inline void scr_memcpyw(u16 *d, u16 *s, unsigned int n) +{ + BUG_ON((long) d >= 0); + + memcpy(d, s, n); +} + +static inline void scr_memmovew(u16 *d, u16 *s, unsigned int n) +{ + BUG_ON((long) d >= 0); + + memmove(d, s, n); +} + #define VGA_MAP_MEM(x,s) (x) #endif diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 443a70bccc1c862d945d3a84945c6771033b74c7..6becb96c60a03c5515cb443b5cdc7c683e259433 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -1200,7 +1200,7 @@ static void __init validate_hv(void) * We use a struct cpumask for this, so it must be big enough. */ if ((smp_height * smp_width) > nr_cpu_ids) - early_panic("Hypervisor %d x %d grid too big for Linux NR_CPUS %d\n", + early_panic("Hypervisor %d x %d grid too big for Linux NR_CPUS %u\n", smp_height, smp_width, nr_cpu_ids); #endif diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4b278a33ccbb70432c34a1c52588e670f35b5365..a3e6e6136a47ad9ad01917c8f244dd2dd9d0b0ed 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2323,6 +2323,10 @@ source "kernel/livepatch/Kconfig" endmenu +config ARCH_HAS_ADD_PAGES + def_bool y + depends on X86_64 && ARCH_ENABLE_MEMORY_HOTPLUG + config ARCH_ENABLE_MEMORY_HOTPLUG def_bool y depends on X86_64 || (X86_32 && HIGHMEM) @@ -2343,6 +2347,10 @@ config ARCH_ENABLE_HUGEPAGE_MIGRATION def_bool y depends on X86_64 && HUGETLB_PAGE && MIGRATION +config ARCH_ENABLE_THP_MIGRATION + def_bool y + depends on X86_64 && TRANSPARENT_HUGEPAGE + menu "Power management and ACPI options" config ARCH_HIBERNATION_HEADER diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index bbeae4a2bd01a3209e6d68f8f2af918eeb17dff2..5b4c44d419c55cafbafbddc8562e5c4b8d074c29 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1172,6 +1172,23 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) { return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY); } + +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION +static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_SWP_SOFT_DIRTY); +} + +static inline int pmd_swp_soft_dirty(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_SWP_SOFT_DIRTY; +} + +static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_SWP_SOFT_DIRTY); +} +#endif #endif #define PKRU_AD_BIT 0x1 diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 2160c1fee9209d7ca7336ee367a0dbadc41d1b55..972a4698c530dc52c98d62247ed3377cf8ad1cf5 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -180,15 +180,21 @@ static inline int pgd_large(pgd_t pgd) { return 0; } /* * Encode and de-code a swap entry * - * | ... | 11| 10| 9|8|7|6|5| 4| 3|2|1|0| <- bit number - * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names - * | OFFSET (14->63) | TYPE (9-13) |0|X|X|X| X| X|X|X|0| <- swp entry + * | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number + * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names + * | OFFSET (14->63) | TYPE (9-13) |0|0|X|X| X| X|X|SD|0| <- swp entry * * G (8) is aliased and used as a PROT_NONE indicator for * !present ptes. We need to start storing swap entries above * there. We also need to avoid using A and D because of an * erratum where they can be incorrectly set by hardware on * non-present PTEs. + * + * SD (1) in swp entry is used to store soft dirty bit, which helps us + * remember soft dirty over page migration + * + * Bit 7 in swp entry should be 0 because pmd_present checks not only P, + * but also L and G. */ #define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1) #define SWP_TYPE_BITS 5 @@ -204,7 +210,9 @@ static inline int pgd_large(pgd_t pgd) { return 0; } ((type) << (SWP_TYPE_FIRST_BIT)) \ | ((offset) << SWP_OFFSET_FIRST_BIT) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) +#define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val((pmd)) }) #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) +#define __swp_entry_to_pmd(x) ((pmd_t) { .pmd = (x).val }) extern int kern_addr_valid(unsigned long addr); extern void cleanup_highmap(void); diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 399261ce904ca1df269e5194b8e523d73b8a3f69..f1492473f10e050d5e877c02bf4e9c06b71f2fd9 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -99,15 +99,15 @@ /* * Tracking soft dirty bit when a page goes to a swap is tricky. * We need a bit which can be stored in pte _and_ not conflict - * with swap entry format. On x86 bits 6 and 7 are *not* involved - * into swap entry computation, but bit 6 is used for nonlinear - * file mapping, so we borrow bit 7 for soft dirty tracking. + * with swap entry format. On x86 bits 1-4 are *not* involved + * into swap entry computation, but bit 7 is used for thp migration, + * so we borrow bit 1 for soft dirty tracking. * * Please note that this bit must be treated as swap dirty page - * mark if and only if the PTE has present bit clear! + * mark if and only if the PTE/PMD has present bit clear! */ #ifdef CONFIG_MEM_SOFT_DIRTY -#define _PAGE_SWP_SOFT_DIRTY _PAGE_PSE +#define _PAGE_SWP_SOFT_DIRTY _PAGE_RW #else #define _PAGE_SWP_SOFT_DIRTY (_AT(pteval_t, 0)) #endif diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h index e9ee84873de50a3b60a4cf8e978d4bf6eae9665f..e371e7229042a567cf487b4426641b7c2cf9e947 100644 --- a/arch/x86/include/asm/string_32.h +++ b/arch/x86/include/asm/string_32.h @@ -340,6 +340,30 @@ extern void *memset(void *, int, size_t); #endif #endif /* !CONFIG_FORTIFY_SOURCE */ +#define __HAVE_ARCH_MEMSET16 +static inline void *memset16(uint16_t *s, uint16_t v, size_t n) +{ + int d0, d1; + asm volatile("rep\n\t" + "stosw" + : "=&c" (d0), "=&D" (d1) + : "a" (v), "1" (s), "0" (n) + : "memory"); + return s; +} + +#define __HAVE_ARCH_MEMSET32 +static inline void *memset32(uint32_t *s, uint32_t v, size_t n) +{ + int d0, d1; + asm volatile("rep\n\t" + "stosl" + : "=&c" (d0), "=&D" (d1) + : "a" (v), "1" (s), "0" (n) + : "memory"); + return s; +} + /* * find the first occurrence of byte 'c', or 1 past the area if none */ diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 2a8c822de1fc695c3bc18768c51494632043d6fe..f372a70a523f10f07509d95b30b10f620a29d46e 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -58,6 +58,42 @@ extern void *__memcpy(void *to, const void *from, size_t len); void *memset(void *s, int c, size_t n); void *__memset(void *s, int c, size_t n); +#define __HAVE_ARCH_MEMSET16 +static inline void *memset16(uint16_t *s, uint16_t v, size_t n) +{ + long d0, d1; + asm volatile("rep\n\t" + "stosw" + : "=&c" (d0), "=&D" (d1) + : "a" (v), "1" (s), "0" (n) + : "memory"); + return s; +} + +#define __HAVE_ARCH_MEMSET32 +static inline void *memset32(uint32_t *s, uint32_t v, size_t n) +{ + long d0, d1; + asm volatile("rep\n\t" + "stosl" + : "=&c" (d0), "=&D" (d1) + : "a" (v), "1" (s), "0" (n) + : "memory"); + return s; +} + +#define __HAVE_ARCH_MEMSET64 +static inline void *memset64(uint64_t *s, uint64_t v, size_t n) +{ + long d0, d1; + asm volatile("rep\n\t" + "stosq" + : "=&c" (d0), "=&D" (d1) + : "a" (v), "1" (s), "0" (n) + : "memory"); + return s; +} + #define __HAVE_ARCH_MEMMOVE void *memmove(void *dest, const void *src, size_t count); void *__memmove(void *dest, const void *src, size_t count); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 7834f73efbf1ec0871d25a6a01b35bb6b5221527..8315e2f517a7ef609e8d43b924a2734f7320e9bb 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2097,7 +2097,7 @@ static int allocate_logical_cpuid(int apicid) /* Allocate a new cpuid. */ if (nr_logical_cpuids >= nr_cpu_ids) { - WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %i reached. " + WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %u reached. " "Processor %d/0x%x and the rest are ignored.\n", nr_cpu_ids, nr_logical_cpuids, apicid); return -EINVAL; diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 6e8fcb6f7e1e43b2d401380794245d43735a502b..28dafed6c68279f46963f89c4a8ce4b01d092c57 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -168,7 +168,7 @@ void __init setup_per_cpu_areas(void) unsigned long delta; int rc; - pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", + pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%u nr_node_ids:%d\n", NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); /* diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 54b9e89d4d6be3844b8b3c310433467d0e5f1481..cd6622c3204e6b4c5c159a659b0b23f4728da34a 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1461,7 +1461,7 @@ __init void prefill_possible_map(void) /* nr_cpu_ids could be reduced via nr_cpus= */ if (possible > nr_cpu_ids) { - pr_warn("%d Processors exceeds NR_CPUS limit of %d\n", + pr_warn("%d Processors exceeds NR_CPUS limit of %u\n", possible, nr_cpu_ids); possible = nr_cpu_ids; } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 136422d7d53992adb326c419d569ff54e3d715a1..048fbe8fc274017cb6584e6b38fed174c457fc99 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -761,7 +761,7 @@ void __init paging_init(void) * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need * updating. */ -static void update_end_of_memory_vars(u64 start, u64 size) +static void update_end_of_memory_vars(u64 start, u64 size) { unsigned long end_pfn = PFN_UP(start + size); @@ -772,22 +772,30 @@ static void update_end_of_memory_vars(u64 start, u64 size) } } -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int add_pages(int nid, unsigned long start_pfn, + unsigned long nr_pages, bool want_memblock) { - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long nr_pages = size >> PAGE_SHIFT; int ret; - init_memory_mapping(start, start + size); - ret = __add_pages(nid, start_pfn, nr_pages, want_memblock); WARN_ON_ONCE(ret); /* update max_pfn, max_low_pfn and high_memory */ - update_end_of_memory_vars(start, size); + update_end_of_memory_vars(start_pfn << PAGE_SHIFT, + nr_pages << PAGE_SHIFT); return ret; } + +int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + + init_memory_mapping(start, start + size); + + return add_pages(nid, start_pfn, nr_pages, want_memblock); +} EXPORT_SYMBOL_GPL(arch_add_memory); #define PAGE_INUSE 0xFD diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 9b86e9b352e9f806b57e84e7b4875705a03356ed..9f342ef1ad426fa7d60e00fffeb313409cbbb3f4 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -93,13 +93,14 @@ struct cfq_ttime { * move this into the elevator for the rq sorting as well. */ struct cfq_rb_root { - struct rb_root rb; - struct rb_node *left; + struct rb_root_cached rb; + struct rb_node *rb_rightmost; unsigned count; u64 min_vdisktime; struct cfq_ttime ttime; }; -#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, \ +#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT_CACHED, \ + .rb_rightmost = NULL, \ .ttime = {.last_end_request = ktime_get_ns(),},} /* @@ -981,10 +982,9 @@ static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime) static void update_min_vdisktime(struct cfq_rb_root *st) { - struct cfq_group *cfqg; + if (!RB_EMPTY_ROOT(&st->rb.rb_root)) { + struct cfq_group *cfqg = rb_entry_cfqg(st->rb.rb_leftmost); - if (st->left) { - cfqg = rb_entry_cfqg(st->left); st->min_vdisktime = max_vdisktime(st->min_vdisktime, cfqg->vdisktime); } @@ -1166,46 +1166,28 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2, } } -/* - * The below is leftmost cache rbtree addon - */ static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root) { /* Service tree is empty */ if (!root->count) return NULL; - if (!root->left) - root->left = rb_first(&root->rb); - - if (root->left) - return rb_entry(root->left, struct cfq_queue, rb_node); - - return NULL; + return rb_entry(rb_first_cached(&root->rb), struct cfq_queue, rb_node); } static struct cfq_group *cfq_rb_first_group(struct cfq_rb_root *root) { - if (!root->left) - root->left = rb_first(&root->rb); - - if (root->left) - return rb_entry_cfqg(root->left); - - return NULL; + return rb_entry_cfqg(rb_first_cached(&root->rb)); } -static void rb_erase_init(struct rb_node *n, struct rb_root *root) +static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root) { - rb_erase(n, root); + if (root->rb_rightmost == n) + root->rb_rightmost = rb_prev(n); + + rb_erase_cached(n, &root->rb); RB_CLEAR_NODE(n); -} -static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root) -{ - if (root->left == n) - root->left = NULL; - rb_erase_init(n, &root->rb); --root->count; } @@ -1255,29 +1237,30 @@ cfqg_key(struct cfq_rb_root *st, struct cfq_group *cfqg) static void __cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) { - struct rb_node **node = &st->rb.rb_node; + struct rb_node **node = &st->rb.rb_root.rb_node; struct rb_node *parent = NULL; struct cfq_group *__cfqg; s64 key = cfqg_key(st, cfqg); - int left = 1; + bool leftmost = true, rightmost = true; while (*node != NULL) { parent = *node; __cfqg = rb_entry_cfqg(parent); - if (key < cfqg_key(st, __cfqg)) + if (key < cfqg_key(st, __cfqg)) { node = &parent->rb_left; - else { + rightmost = false; + } else { node = &parent->rb_right; - left = 0; + leftmost = false; } } - if (left) - st->left = &cfqg->rb_node; + if (rightmost) + st->rb_rightmost = &cfqg->rb_node; rb_link_node(&cfqg->rb_node, parent, node); - rb_insert_color(&cfqg->rb_node, &st->rb); + rb_insert_color_cached(&cfqg->rb_node, &st->rb, leftmost); } /* @@ -1378,7 +1361,7 @@ cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg) * so that groups get lesser vtime based on their weights, so that * if group does not loose all if it was not continuously backlogged. */ - n = rb_last(&st->rb); + n = st->rb_rightmost; if (n) { __cfqg = rb_entry_cfqg(n); cfqg->vdisktime = __cfqg->vdisktime + @@ -2220,14 +2203,14 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct cfq_queue *__cfqq; u64 rb_key; struct cfq_rb_root *st; - int left; + bool leftmost = true; int new_cfqq = 1; u64 now = ktime_get_ns(); st = st_for(cfqq->cfqg, cfqq_class(cfqq), cfqq_type(cfqq)); if (cfq_class_idle(cfqq)) { rb_key = CFQ_IDLE_DELAY; - parent = rb_last(&st->rb); + parent = st->rb_rightmost; if (parent && parent != &cfqq->rb_node) { __cfqq = rb_entry(parent, struct cfq_queue, rb_node); rb_key += __cfqq->rb_key; @@ -2261,10 +2244,9 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, cfqq->service_tree = NULL; } - left = 1; parent = NULL; cfqq->service_tree = st; - p = &st->rb.rb_node; + p = &st->rb.rb_root.rb_node; while (*p) { parent = *p; __cfqq = rb_entry(parent, struct cfq_queue, rb_node); @@ -2276,16 +2258,13 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, p = &parent->rb_left; else { p = &parent->rb_right; - left = 0; + leftmost = false; } } - if (left) - st->left = &cfqq->rb_node; - cfqq->rb_key = rb_key; rb_link_node(&cfqq->rb_node, parent, p); - rb_insert_color(&cfqq->rb_node, &st->rb); + rb_insert_color_cached(&cfqq->rb_node, &st->rb, leftmost); st->count++; if (add_front || !new_cfqq) return; @@ -2732,7 +2711,7 @@ static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) /* There is nothing to dispatch */ if (!st) return NULL; - if (RB_EMPTY_ROOT(&st->rb)) + if (RB_EMPTY_ROOT(&st->rb.rb_root)) return NULL; return cfq_rb_first(st); } @@ -3219,7 +3198,7 @@ static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) struct cfq_rb_root *st = &cfqd->grp_service_tree; struct cfq_group *cfqg; - if (RB_EMPTY_ROOT(&st->rb)) + if (RB_EMPTY_ROOT(&st->rb.rb_root)) return NULL; cfqg = cfq_rb_first_group(st); update_min_vdisktime(st); diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 2c3b359b3536a15cdb21fbc60af80644368c655d..321cd7b4d817fd6ffd9323362041c7d4cb29b7e9 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -256,9 +256,9 @@ static ssize_t print_cpus_offline(struct device *dev, buf[n++] = ','; if (nr_cpu_ids == total_cpus-1) - n += snprintf(&buf[n], len - n, "%d", nr_cpu_ids); + n += snprintf(&buf[n], len - n, "%u", nr_cpu_ids); else - n += snprintf(&buf[n], len - n, "%d-%d", + n += snprintf(&buf[n], len - n, "%u-%d", nr_cpu_ids, total_cpus-1); } diff --git a/drivers/base/node.c b/drivers/base/node.c index d8dc83017d8dc09f27c5c7eff657e05cf6dd028e..3855902f2c5b369dc538759950f5b4c951c484d6 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -160,12 +160,12 @@ static ssize_t node_read_numastat(struct device *dev, "interleave_hit %lu\n" "local_node %lu\n" "other_node %lu\n", - sum_zone_node_page_state(dev->id, NUMA_HIT), - sum_zone_node_page_state(dev->id, NUMA_MISS), - sum_zone_node_page_state(dev->id, NUMA_FOREIGN), - sum_zone_node_page_state(dev->id, NUMA_INTERLEAVE_HIT), - sum_zone_node_page_state(dev->id, NUMA_LOCAL), - sum_zone_node_page_state(dev->id, NUMA_OTHER)); + sum_zone_numa_state(dev->id, NUMA_HIT), + sum_zone_numa_state(dev->id, NUMA_MISS), + sum_zone_numa_state(dev->id, NUMA_FOREIGN), + sum_zone_numa_state(dev->id, NUMA_INTERLEAVE_HIT), + sum_zone_numa_state(dev->id, NUMA_LOCAL), + sum_zone_numa_state(dev->id, NUMA_OTHER)); } static DEVICE_ATTR(numastat, S_IRUGO, node_read_numastat, NULL); @@ -181,9 +181,17 @@ static ssize_t node_read_vmstat(struct device *dev, n += sprintf(buf+n, "%s %lu\n", vmstat_text[i], sum_zone_node_page_state(nid, i)); - for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) +#ifdef CONFIG_NUMA + for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) n += sprintf(buf+n, "%s %lu\n", vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], + sum_zone_numa_state(nid, i)); +#endif + + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) + n += sprintf(buf+n, "%s %lu\n", + vmstat_text[i + NR_VM_ZONE_STAT_ITEMS + + NR_VM_NUMA_STAT_ITEMS], node_page_state(pgdat, i)); return n; diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 4063f3f59f4f35ffde8f95dc162df35c754d93bd..2981c27d3aae316fedd267c6c79d76eb68a9395e 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -175,20 +175,11 @@ static inline void update_used_max(struct zram *zram, } while (old_max != cur_max); } -static inline void zram_fill_page(char *ptr, unsigned long len, +static inline void zram_fill_page(void *ptr, unsigned long len, unsigned long value) { - int i; - unsigned long *page = (unsigned long *)ptr; - WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long))); - - if (likely(value == 0)) { - memset(ptr, 0, len); - } else { - for (i = 0; i < len / sizeof(*page); i++) - page[i] = value; - } + memset_l(ptr, value, len / sizeof(unsigned long)); } static bool page_same_filled(void *ptr, unsigned long *element) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index e1cde6b80027fe1cad56b6169c38aadac1974e3e..3b0f2ec6eec7d257f5ccb956ff2b318973c5c4f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -51,7 +51,7 @@ struct amdgpu_mn { /* objects protected by lock */ struct mutex lock; - struct rb_root objects; + struct rb_root_cached objects; }; struct amdgpu_mn_node { @@ -76,8 +76,8 @@ static void amdgpu_mn_destroy(struct work_struct *work) mutex_lock(&adev->mn_lock); mutex_lock(&rmn->lock); hash_del(&rmn->node); - rbtree_postorder_for_each_entry_safe(node, next_node, &rmn->objects, - it.rb) { + rbtree_postorder_for_each_entry_safe(node, next_node, + &rmn->objects.rb_root, it.rb) { list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { bo->mn = NULL; list_del_init(&bo->mn_list); @@ -221,7 +221,7 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) rmn->mm = mm; rmn->mn.ops = &amdgpu_mn_ops; mutex_init(&rmn->lock); - rmn->objects = RB_ROOT; + rmn->objects = RB_ROOT_CACHED; r = __mmu_notifier_register(&rmn->mn, mm); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 6b1343e5541d3f2a2c0415cd33593c6c1f188a08..b9a5a77eedaf00c339ef15faac6e1c4302053f2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2475,7 +2475,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, u64 flags; uint64_t init_pde_value = 0; - vm->va = RB_ROOT; + vm->va = RB_ROOT_CACHED; vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter); for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) vm->reserved_vmid[i] = NULL; @@ -2596,10 +2596,11 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amd_sched_entity_fini(vm->entity.sched, &vm->entity); - if (!RB_EMPTY_ROOT(&vm->va)) { + if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { dev_err(adev->dev, "still active bo inside vm\n"); } - rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va, rb) { + rbtree_postorder_for_each_entry_safe(mapping, tmp, + &vm->va.rb_root, rb) { list_del(&mapping->list); amdgpu_vm_it_remove(mapping, &vm->va); kfree(mapping); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index ba6691b58ee72f4e785bcf83a643065a3e8b89e0..6716355403ec5fefd4ce2438587ea6cdbe6c22b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -118,7 +118,7 @@ struct amdgpu_vm_pt { struct amdgpu_vm { /* tree of virtual addresses mapped */ - struct rb_root va; + struct rb_root_cached va; /* protecting invalidated */ spinlock_t status_lock; diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index f794089d30ac21de22894774f13d4d2871213573..61a1c8ea74bc5838b905a67e55cb80203f8ef58f 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -169,7 +169,7 @@ INTERVAL_TREE_DEFINE(struct drm_mm_node, rb, struct drm_mm_node * __drm_mm_interval_first(const struct drm_mm *mm, u64 start, u64 last) { - return drm_mm_interval_tree_iter_first((struct rb_root *)&mm->interval_tree, + return drm_mm_interval_tree_iter_first((struct rb_root_cached *)&mm->interval_tree, start, last) ?: (struct drm_mm_node *)&mm->head_node; } EXPORT_SYMBOL(__drm_mm_interval_first); @@ -180,6 +180,7 @@ static void drm_mm_interval_tree_add_node(struct drm_mm_node *hole_node, struct drm_mm *mm = hole_node->mm; struct rb_node **link, *rb; struct drm_mm_node *parent; + bool leftmost = true; node->__subtree_last = LAST(node); @@ -196,9 +197,10 @@ static void drm_mm_interval_tree_add_node(struct drm_mm_node *hole_node, rb = &hole_node->rb; link = &hole_node->rb.rb_right; + leftmost = false; } else { rb = NULL; - link = &mm->interval_tree.rb_node; + link = &mm->interval_tree.rb_root.rb_node; } while (*link) { @@ -208,14 +210,15 @@ static void drm_mm_interval_tree_add_node(struct drm_mm_node *hole_node, parent->__subtree_last = node->__subtree_last; if (node->start < parent->start) link = &parent->rb.rb_left; - else + else { link = &parent->rb.rb_right; + leftmost = true; + } } rb_link_node(&node->rb, rb, link); - rb_insert_augmented(&node->rb, - &mm->interval_tree, - &drm_mm_interval_tree_augment); + rb_insert_augmented_cached(&node->rb, &mm->interval_tree, leftmost, + &drm_mm_interval_tree_augment); } #define RB_INSERT(root, member, expr) do { \ @@ -577,7 +580,7 @@ void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new) *new = *old; list_replace(&old->node_list, &new->node_list); - rb_replace_node(&old->rb, &new->rb, &old->mm->interval_tree); + rb_replace_node(&old->rb, &new->rb, &old->mm->interval_tree.rb_root); if (drm_mm_hole_follows(old)) { list_replace(&old->hole_stack, &new->hole_stack); @@ -863,7 +866,7 @@ void drm_mm_init(struct drm_mm *mm, u64 start, u64 size) mm->color_adjust = NULL; INIT_LIST_HEAD(&mm->hole_stack); - mm->interval_tree = RB_ROOT; + mm->interval_tree = RB_ROOT_CACHED; mm->holes_size = RB_ROOT; mm->holes_addr = RB_ROOT; diff --git a/drivers/gpu/drm/drm_vma_manager.c b/drivers/gpu/drm/drm_vma_manager.c index d9100b5651982c98575f27b32e3efe9efcc8e00b..28f1226576f8c16cb953f48c0e2eafdebff35a0d 100644 --- a/drivers/gpu/drm/drm_vma_manager.c +++ b/drivers/gpu/drm/drm_vma_manager.c @@ -147,7 +147,7 @@ struct drm_vma_offset_node *drm_vma_offset_lookup_locked(struct drm_vma_offset_m struct rb_node *iter; unsigned long offset; - iter = mgr->vm_addr_space_mm.interval_tree.rb_node; + iter = mgr->vm_addr_space_mm.interval_tree.rb_root.rb_node; best = NULL; while (likely(iter)) { diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index f152a38d707941f2e9a602c94298b146451a9796..23fd18bd1b56b7f54c7d16cc3d5f1b72a03aaf71 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -49,7 +49,7 @@ struct i915_mmu_notifier { spinlock_t lock; struct hlist_node node; struct mmu_notifier mn; - struct rb_root objects; + struct rb_root_cached objects; struct workqueue_struct *wq; }; @@ -123,7 +123,7 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, struct interval_tree_node *it; LIST_HEAD(cancelled); - if (RB_EMPTY_ROOT(&mn->objects)) + if (RB_EMPTY_ROOT(&mn->objects.rb_root)) return; /* interval ranges are inclusive, but invalidate range is exclusive */ @@ -172,7 +172,7 @@ i915_mmu_notifier_create(struct mm_struct *mm) spin_lock_init(&mn->lock); mn->mn.ops = &i915_gem_userptr_notifier; - mn->objects = RB_ROOT; + mn->objects = RB_ROOT_CACHED; mn->wq = alloc_workqueue("i915-userptr-release", WQ_UNBOUND, 0); if (mn->wq == NULL) { kfree(mn); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index ec63bc5e9de74c2b0d61de04ebc55ae66dfbf382..8cbaeec090c94371ad1bd776b8e84a583fa1e4ba 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -924,7 +924,7 @@ struct radeon_vm_id { struct radeon_vm { struct mutex mutex; - struct rb_root va; + struct rb_root_cached va; /* protecting invalidated and freed */ spinlock_t status_lock; diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c index 896f2cf51e4e530d4c180e596635d250542a53c8..1d62288b7ee3e82ec05149623b9c5120747f1ecf 100644 --- a/drivers/gpu/drm/radeon/radeon_mn.c +++ b/drivers/gpu/drm/radeon/radeon_mn.c @@ -50,7 +50,7 @@ struct radeon_mn { /* objects protected by lock */ struct mutex lock; - struct rb_root objects; + struct rb_root_cached objects; }; struct radeon_mn_node { @@ -75,8 +75,8 @@ static void radeon_mn_destroy(struct work_struct *work) mutex_lock(&rdev->mn_lock); mutex_lock(&rmn->lock); hash_del(&rmn->node); - rbtree_postorder_for_each_entry_safe(node, next_node, &rmn->objects, - it.rb) { + rbtree_postorder_for_each_entry_safe(node, next_node, + &rmn->objects.rb_root, it.rb) { interval_tree_remove(&node->it, &rmn->objects); list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { @@ -205,7 +205,7 @@ static struct radeon_mn *radeon_mn_get(struct radeon_device *rdev) rmn->mm = mm; rmn->mn.ops = &radeon_mn_ops; mutex_init(&rmn->lock); - rmn->objects = RB_ROOT; + rmn->objects = RB_ROOT_CACHED; r = __mmu_notifier_register(&rmn->mn, mm); if (r) diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 5e82b408d5227681cbe24b615d77f61e6a8a2ecc..e5c0e635e3717f21bb6182894b11a0d1c3b131f3 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -1185,7 +1185,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) vm->ids[i].last_id_use = NULL; } mutex_init(&vm->mutex); - vm->va = RB_ROOT; + vm->va = RB_ROOT_CACHED; spin_lock_init(&vm->status_lock); INIT_LIST_HEAD(&vm->invalidated); INIT_LIST_HEAD(&vm->freed); @@ -1232,10 +1232,11 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) struct radeon_bo_va *bo_va, *tmp; int i, r; - if (!RB_EMPTY_ROOT(&vm->va)) { + if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { dev_err(rdev->dev, "still active bo inside vm\n"); } - rbtree_postorder_for_each_entry_safe(bo_va, tmp, &vm->va, it.rb) { + rbtree_postorder_for_each_entry_safe(bo_va, tmp, + &vm->va.rb_root, it.rb) { interval_tree_remove(&bo_va->it, &vm->va); r = radeon_bo_reserve(bo_va->bo, false); if (!r) { diff --git a/drivers/infiniband/core/umem_rbtree.c b/drivers/infiniband/core/umem_rbtree.c index d176597b4d78d1efad8bdd6b66e79c142fa9ced7..fc801920e3410471eea0f48f153a5769ed6d7040 100644 --- a/drivers/infiniband/core/umem_rbtree.c +++ b/drivers/infiniband/core/umem_rbtree.c @@ -72,7 +72,7 @@ INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last, /* @last is not a part of the interval. See comment for function * node_last. */ -int rbt_ib_umem_for_each_in_range(struct rb_root *root, +int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, u64 start, u64 last, umem_call_back cb, void *cookie) @@ -95,7 +95,7 @@ int rbt_ib_umem_for_each_in_range(struct rb_root *root, } EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range); -struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root *root, +struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root, u64 addr, u64 length) { struct umem_odp_node *node; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index e0cb998609345e670f1ff651d1121461ef885246..4ab30d832ac5b8a5ac4994b9c4f0b3f3802e019c 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -118,7 +118,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, ucontext->closing = 0; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - ucontext->umem_tree = RB_ROOT; + ucontext->umem_tree = RB_ROOT_CACHED; init_rwsem(&ucontext->umem_rwsem); ucontext->odp_mrs_count = 0; INIT_LIST_HEAD(&ucontext->no_private_counters); diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index 2f0d285dc27854ef5318000799908df046ba1dbd..175002c046ede7dfae8f92eb6042e7da72323a3a 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -54,7 +54,7 @@ struct mmu_rb_handler { struct mmu_notifier mn; - struct rb_root root; + struct rb_root_cached root; void *ops_arg; spinlock_t lock; /* protect the RB tree */ struct mmu_rb_ops *ops; @@ -108,7 +108,7 @@ int hfi1_mmu_rb_register(void *ops_arg, struct mm_struct *mm, if (!handlr) return -ENOMEM; - handlr->root = RB_ROOT; + handlr->root = RB_ROOT_CACHED; handlr->ops = ops; handlr->ops_arg = ops_arg; INIT_HLIST_NODE(&handlr->mn.hlist); @@ -149,9 +149,9 @@ void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler) INIT_LIST_HEAD(&del_list); spin_lock_irqsave(&handler->lock, flags); - while ((node = rb_first(&handler->root))) { + while ((node = rb_first_cached(&handler->root))) { rbnode = rb_entry(node, struct mmu_rb_node, node); - rb_erase(node, &handler->root); + rb_erase_cached(node, &handler->root); /* move from LRU list to delete list */ list_move(&rbnode->list, &del_list); } @@ -300,7 +300,7 @@ static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, { struct mmu_rb_handler *handler = container_of(mn, struct mmu_rb_handler, mn); - struct rb_root *root = &handler->root; + struct rb_root_cached *root = &handler->root; struct mmu_rb_node *node, *ptr = NULL; unsigned long flags; bool added = false; diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index c49db7c33979c9245744ead6262c108e16205159..4381c0a9a8738043ef31fb40e89108ec8bc75417 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -227,7 +227,7 @@ static void __usnic_uiom_reg_release(struct usnic_uiom_pd *pd, vpn_last = vpn_start + npages - 1; spin_lock(&pd->lock); - usnic_uiom_remove_interval(&pd->rb_root, vpn_start, + usnic_uiom_remove_interval(&pd->root, vpn_start, vpn_last, &rm_intervals); usnic_uiom_unmap_sorted_intervals(&rm_intervals, pd); @@ -379,7 +379,7 @@ struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd, err = usnic_uiom_get_intervals_diff(vpn_start, vpn_last, (writable) ? IOMMU_WRITE : 0, IOMMU_WRITE, - &pd->rb_root, + &pd->root, &sorted_diff_intervals); if (err) { usnic_err("Failed disjoint interval vpn [0x%lx,0x%lx] err %d\n", @@ -395,7 +395,7 @@ struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd, } - err = usnic_uiom_insert_interval(&pd->rb_root, vpn_start, vpn_last, + err = usnic_uiom_insert_interval(&pd->root, vpn_start, vpn_last, (writable) ? IOMMU_WRITE : 0); if (err) { usnic_err("Failed insert interval vpn [0x%lx,0x%lx] err %d\n", diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h index 45ca7c1613a7f76a86b8ac81c0d215746490ea04..431efe4143f4039666a54c8fb115223ab7f2f6cb 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.h +++ b/drivers/infiniband/hw/usnic/usnic_uiom.h @@ -55,7 +55,7 @@ struct usnic_uiom_dev { struct usnic_uiom_pd { struct iommu_domain *domain; spinlock_t lock; - struct rb_root rb_root; + struct rb_root_cached root; struct list_head devs; int dev_cnt; }; diff --git a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c index 42b4b4c4e452eae8b712fb0ef295f304a7ab7492..d399523206c78b083c42557134ca858afba1325d 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c @@ -100,9 +100,9 @@ static int interval_cmp(void *priv, struct list_head *a, struct list_head *b) } static void -find_intervals_intersection_sorted(struct rb_root *root, unsigned long start, - unsigned long last, - struct list_head *list) +find_intervals_intersection_sorted(struct rb_root_cached *root, + unsigned long start, unsigned long last, + struct list_head *list) { struct usnic_uiom_interval_node *node; @@ -118,7 +118,7 @@ find_intervals_intersection_sorted(struct rb_root *root, unsigned long start, int usnic_uiom_get_intervals_diff(unsigned long start, unsigned long last, int flags, int flag_mask, - struct rb_root *root, + struct rb_root_cached *root, struct list_head *diff_set) { struct usnic_uiom_interval_node *interval, *tmp; @@ -175,7 +175,7 @@ void usnic_uiom_put_interval_set(struct list_head *intervals) kfree(interval); } -int usnic_uiom_insert_interval(struct rb_root *root, unsigned long start, +int usnic_uiom_insert_interval(struct rb_root_cached *root, unsigned long start, unsigned long last, int flags) { struct usnic_uiom_interval_node *interval, *tmp; @@ -246,8 +246,9 @@ int usnic_uiom_insert_interval(struct rb_root *root, unsigned long start, return err; } -void usnic_uiom_remove_interval(struct rb_root *root, unsigned long start, - unsigned long last, struct list_head *removed) +void usnic_uiom_remove_interval(struct rb_root_cached *root, + unsigned long start, unsigned long last, + struct list_head *removed) { struct usnic_uiom_interval_node *interval; diff --git a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h index c0b0b876ab905a574dd23c3a87fa13d8d9ce4990..1d7fc3226bcadebee1c9dce7a2938da5cae3b8c1 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h +++ b/drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h @@ -48,12 +48,12 @@ struct usnic_uiom_interval_node { extern void usnic_uiom_interval_tree_insert(struct usnic_uiom_interval_node *node, - struct rb_root *root); + struct rb_root_cached *root); extern void usnic_uiom_interval_tree_remove(struct usnic_uiom_interval_node *node, - struct rb_root *root); + struct rb_root_cached *root); extern struct usnic_uiom_interval_node * -usnic_uiom_interval_tree_iter_first(struct rb_root *root, +usnic_uiom_interval_tree_iter_first(struct rb_root_cached *root, unsigned long start, unsigned long last); extern struct usnic_uiom_interval_node * @@ -63,7 +63,7 @@ usnic_uiom_interval_tree_iter_next(struct usnic_uiom_interval_node *node, * Inserts {start...last} into {root}. If there are overlaps, * nodes will be broken up and merged */ -int usnic_uiom_insert_interval(struct rb_root *root, +int usnic_uiom_insert_interval(struct rb_root_cached *root, unsigned long start, unsigned long last, int flags); /* @@ -71,7 +71,7 @@ int usnic_uiom_insert_interval(struct rb_root *root, * 'removed.' The caller is responsibile for freeing memory of nodes in * 'removed.' */ -void usnic_uiom_remove_interval(struct rb_root *root, +void usnic_uiom_remove_interval(struct rb_root_cached *root, unsigned long start, unsigned long last, struct list_head *removed); /* @@ -81,7 +81,7 @@ void usnic_uiom_remove_interval(struct rb_root *root, int usnic_uiom_get_intervals_diff(unsigned long start, unsigned long last, int flags, int flag_mask, - struct rb_root *root, + struct rb_root_cached *root, struct list_head *diff_set); /* Call this to free diff_set returned by usnic_uiom_get_intervals_diff */ void usnic_uiom_put_interval_set(struct list_head *intervals); diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c index d723be35214827edbada0ecf0d175c8ead3a62bd..3087b0ba7b7f3d708f92cf58ea838bb7685d3edb 100644 --- a/drivers/mtd/nand/denali.c +++ b/drivers/mtd/nand/denali.c @@ -980,9 +980,6 @@ static int denali_erase(struct mtd_info *mtd, int page) return irq_status & INTR__ERASE_COMP ? 0 : NAND_STATUS_FAIL; } -#define DIV_ROUND_DOWN_ULL(ll, d) \ - ({ unsigned long long _tmp = (ll); do_div(_tmp, d); _tmp; }) - static int denali_setup_data_interface(struct mtd_info *mtd, int chipnr, const struct nand_data_interface *conf) { diff --git a/drivers/pps/Kconfig b/drivers/pps/Kconfig index 4b29a7182d7bafd8fc0de8630c60e83bcb07af9c..c6008f296605587660a0c6c4baad34bd18c78403 100644 --- a/drivers/pps/Kconfig +++ b/drivers/pps/Kconfig @@ -19,9 +19,10 @@ menuconfig PPS To compile this driver as a module, choose M here: the module will be called pps_core.ko. +if PPS + config PPS_DEBUG bool "PPS debugging messages" - depends on PPS help Say Y here if you want the PPS support to produce a bunch of debug messages to the system log. Select this if you are having a @@ -29,7 +30,7 @@ config PPS_DEBUG config NTP_PPS bool "PPS kernel consumer support" - depends on PPS && !NO_HZ_COMMON + depends on !NO_HZ_COMMON help This option adds support for direct in-kernel time synchronization using an external PPS signal. @@ -39,3 +40,5 @@ config NTP_PPS source drivers/pps/clients/Kconfig source drivers/pps/generators/Kconfig + +endif # PPS diff --git a/drivers/pps/clients/Kconfig b/drivers/pps/clients/Kconfig index efec021ce662d8b9bb487376108a4c309011c483..7f02a9b1a1fd14bf1e145020076f2dfd45e89fbb 100644 --- a/drivers/pps/clients/Kconfig +++ b/drivers/pps/clients/Kconfig @@ -3,11 +3,9 @@ # comment "PPS clients support" - depends on PPS config PPS_CLIENT_KTIMER tristate "Kernel timer client (Testing client, use for debug)" - depends on PPS help If you say yes here you get support for a PPS debugging client which uses a kernel timer to generate the PPS signal. @@ -17,21 +15,20 @@ config PPS_CLIENT_KTIMER config PPS_CLIENT_LDISC tristate "PPS line discipline" - depends on PPS && TTY + depends on TTY help If you say yes here you get support for a PPS source connected with the CD (Carrier Detect) pin of your serial port. config PPS_CLIENT_PARPORT tristate "Parallel port PPS client" - depends on PPS && PARPORT + depends on PARPORT help If you say yes here you get support for a PPS source connected with the interrupt pin of your parallel port. config PPS_CLIENT_GPIO tristate "PPS client using GPIO" - depends on PPS help If you say yes here you get support for a PPS source using GPIO. To be useful you must also register a platform device diff --git a/drivers/pps/generators/Kconfig b/drivers/pps/generators/Kconfig index 86b59378e71fc85b2b3c8926df36981a7567e99d..e4c4f3dc0728fc5c495b2283d08c180210a29379 100644 --- a/drivers/pps/generators/Kconfig +++ b/drivers/pps/generators/Kconfig @@ -3,11 +3,10 @@ # comment "PPS generators support" - depends on PPS config PPS_GENERATOR_PARPORT tristate "Parallel port PPS signal generator" - depends on PPS && PARPORT && BROKEN + depends on PARPORT && BROKEN help If you say yes here you get support for a PPS signal generator which utilizes STROBE pin of a parallel port to send PPS signals. It uses diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 77a0335eb757de1c3678e267020f55f893f4964c..09ba494f88967351d84b6ab2ed781b89cc848d18 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -5465,7 +5465,7 @@ static int sdebug_driver_probe(struct device * dev) return error; } if (submit_queues > nr_cpu_ids) { - pr_warn("%s: trim submit_queues (was %d) to nr_cpu_ids=%d\n", + pr_warn("%s: trim submit_queues (was %d) to nr_cpu_ids=%u\n", my_name, submit_queues, nr_cpu_ids); submit_queues = nr_cpu_ids; } diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.c b/drivers/scsi/sym53c8xx_2/sym_hipd.c index 15ff285a9f8f746ee7946e65f4f6264a5a524f67..ca360daa6a253c7a9a0f4f29eaeaf5a49a1a8c98 100644 --- a/drivers/scsi/sym53c8xx_2/sym_hipd.c +++ b/drivers/scsi/sym53c8xx_2/sym_hipd.c @@ -4985,13 +4985,10 @@ struct sym_lcb *sym_alloc_lcb (struct sym_hcb *np, u_char tn, u_char ln) * Compute the bus address of this table. */ if (ln && !tp->luntbl) { - int i; - tp->luntbl = sym_calloc_dma(256, "LUNTBL"); if (!tp->luntbl) goto fail; - for (i = 0 ; i < 64 ; i++) - tp->luntbl[i] = cpu_to_scr(vtobus(&np->badlun_sa)); + memset32(tp->luntbl, cpu_to_scr(vtobus(&np->badlun_sa)), 64); tp->head.luntbl_sa = cpu_to_scr(vtobus(tp->luntbl)); } @@ -5077,8 +5074,7 @@ static void sym_alloc_lcb_tags (struct sym_hcb *np, u_char tn, u_char ln) /* * Initialize the task table with invalid entries. */ - for (i = 0 ; i < SYM_CONF_MAX_TASK ; i++) - lp->itlq_tbl[i] = cpu_to_scr(np->notask_ba); + memset32(lp->itlq_tbl, cpu_to_scr(np->notask_ba), SYM_CONF_MAX_TASK); /* * Fill up the tag buffer with tag numbers. @@ -5764,8 +5760,7 @@ int sym_hcb_attach(struct Scsi_Host *shost, struct sym_fw *fw, struct sym_nvram goto attach_failed; np->badlun_sa = cpu_to_scr(SCRIPTB_BA(np, resel_bad_lun)); - for (i = 0 ; i < 64 ; i++) /* 64 luns/target, no less */ - np->badluntbl[i] = cpu_to_scr(vtobus(&np->badlun_sa)); + memset32(np->badluntbl, cpu_to_scr(vtobus(&np->badlun_sa)), 64); /* * Prepare the bus address array that contains the bus diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 9cb3f722dce13aea78b5e81152e27ca1346f10f9..d6dbb28245e6d28129e6c7d18b52589d6ba2a2bb 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1271,7 +1271,7 @@ static struct vhost_umem *vhost_umem_alloc(void) if (!umem) return NULL; - umem->umem_tree = RB_ROOT; + umem->umem_tree = RB_ROOT_CACHED; umem->numem = 0; INIT_LIST_HEAD(&umem->umem_list); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index bb7c29b8b9fc83e63cdd6be02acd58690e36cb57..d59a9cc65f9d2b9b05860d0b251105d122477a82 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -71,7 +71,7 @@ struct vhost_umem_node { }; struct vhost_umem { - struct rb_root umem_tree; + struct rb_root_cached umem_tree; struct list_head umem_list; int numem; }; diff --git a/fs/aio.c b/fs/aio.c index 8f01275262997740faefa54301189dfab163835f..b5d69f28d8b1cd53f8cf760d845b6addd6d86ab3 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -373,6 +373,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, pgoff_t idx; int rc; + /* + * We cannot support the _NO_COPY case here, because copy needs to + * happen under the ctx->completion_lock. That does not work with the + * migration workflow of MIGRATE_SYNC_NO_COPY. + */ + if (mode == MIGRATE_SYNC_NO_COPY) + return -EINVAL; + rc = 0; /* mapping->private_lock here protects against the kioctx teardown. */ diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index beef981aa54f34f40478b019f2afe7e0d81d4c7b..4737615f0eaaa4a43e3050b4adc4aa20ba2850a3 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -11,10 +11,21 @@ #include #include + +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include #include +#include /* This is the range of ioctl() numbers we claim as ours */ #define AUTOFS_IOC_FIRST AUTOFS_IOC_READY @@ -24,17 +35,6 @@ #define AUTOFS_DEV_IOCTL_IOC_COUNT \ (AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD - AUTOFS_DEV_IOCTL_VERSION_CMD) -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - #ifdef pr_fmt #undef pr_fmt #endif diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index dd9f1bebb5a3a980b55e5d0fb758c93e4f694722..b7c816f3940402f21002cb6e6cbc391b35cb32b9 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -93,17 +93,17 @@ static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param) * at the end of the struct. */ static struct autofs_dev_ioctl * - copy_dev_ioctl(struct autofs_dev_ioctl __user *in) +copy_dev_ioctl(struct autofs_dev_ioctl __user *in) { struct autofs_dev_ioctl tmp, *res; - if (copy_from_user(&tmp, in, sizeof(tmp))) + if (copy_from_user(&tmp, in, AUTOFS_DEV_IOCTL_SIZE)) return ERR_PTR(-EFAULT); - if (tmp.size < sizeof(tmp)) + if (tmp.size < AUTOFS_DEV_IOCTL_SIZE) return ERR_PTR(-EINVAL); - if (tmp.size > (PATH_MAX + sizeof(tmp))) + if (tmp.size > AUTOFS_DEV_IOCTL_SIZE + PATH_MAX) return ERR_PTR(-ENAMETOOLONG); res = memdup_user(in, tmp.size); @@ -133,8 +133,8 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) goto out; } - if (param->size > sizeof(*param)) { - err = invalid_str(param->path, param->size - sizeof(*param)); + if (param->size > AUTOFS_DEV_IOCTL_SIZE) { + err = invalid_str(param->path, param->size - AUTOFS_DEV_IOCTL_SIZE); if (err) { pr_warn( "path string terminator missing for cmd(0x%08x)\n", @@ -258,11 +258,6 @@ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid) if (err) goto out; - /* - * Find autofs super block that has the device number - * corresponding to the autofs fs we want to open. - */ - filp = dentry_open(&path, O_RDONLY, current_cred()); path_put(&path); if (IS_ERR(filp)) { @@ -451,7 +446,7 @@ static int autofs_dev_ioctl_requester(struct file *fp, dev_t devid; int err = -ENOENT; - if (param->size <= sizeof(*param)) { + if (param->size <= AUTOFS_DEV_IOCTL_SIZE) { err = -EINVAL; goto out; } @@ -539,7 +534,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp, unsigned int devid, magic; int err = -ENOENT; - if (param->size <= sizeof(*param)) { + if (param->size <= AUTOFS_DEV_IOCTL_SIZE) { err = -EINVAL; goto out; } @@ -628,10 +623,6 @@ static int _autofs_dev_ioctl(unsigned int command, ioctl_fn fn = NULL; int err = 0; - /* only root can play with this */ - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - cmd_first = _IOC_NR(AUTOFS_DEV_IOCTL_IOC_FIRST); cmd = _IOC_NR(command); @@ -640,6 +631,14 @@ static int _autofs_dev_ioctl(unsigned int command, return -ENOTTY; } + /* Only root can use ioctls other than AUTOFS_DEV_IOCTL_VERSION_CMD + * and AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD + */ + if (cmd != AUTOFS_DEV_IOCTL_VERSION_CMD && + cmd != AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD && + !capable(CAP_SYS_ADMIN)) + return -EPERM; + /* Copy the parameters into kernel space. */ param = copy_dev_ioctl(user); if (IS_ERR(param)) @@ -706,7 +705,8 @@ static int _autofs_dev_ioctl(unsigned int command, return err; } -static long autofs_dev_ioctl(struct file *file, uint command, ulong u) +static long autofs_dev_ioctl(struct file *file, unsigned int command, + unsigned long u) { int err; @@ -715,9 +715,10 @@ static long autofs_dev_ioctl(struct file *file, uint command, ulong u) } #ifdef CONFIG_COMPAT -static long autofs_dev_ioctl_compat(struct file *file, uint command, ulong u) +static long autofs_dev_ioctl_compat(struct file *file, unsigned int command, + unsigned long u) { - return (long) autofs_dev_ioctl(file, command, (ulong) compat_ptr(u)); + return autofs_dev_ioctl(file, command, (unsigned long) compat_ptr(u)); } #else #define autofs_dev_ioctl_compat NULL @@ -733,7 +734,8 @@ static const struct file_operations _dev_ioctl_fops = { static struct miscdevice _autofs_dev_ioctl_misc = { .minor = AUTOFS_MINOR, .name = AUTOFS_DEVICE_NAME, - .fops = &_dev_ioctl_fops + .fops = &_dev_ioctl_fops, + .mode = 0644, }; MODULE_ALIAS_MISCDEV(AUTOFS_MINOR); diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 604a176df0c2a4f1b9b7ee9a465b3a36a47b3c26..ce6537c50ec140199c6d5b7c6d161afbcf5a8529 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -192,13 +192,11 @@ static int decompress_exec( memset(&strm, 0, sizeof(strm)); strm.workspace = kmalloc(zlib_inflate_workspacesize(), GFP_KERNEL); - if (strm.workspace == NULL) { - pr_debug("no memory for decompress workspace\n"); + if (!strm.workspace) return -ENOMEM; - } + buf = kmalloc(LBUFSIZE, GFP_KERNEL); - if (buf == NULL) { - pr_debug("no memory for read buffer\n"); + if (!buf) { retval = -ENOMEM; goto out_free; } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index adbe328b957c1e386cc354b8218fec185a992bff..2fabd19cdeea76e32f51f3c84348a22b9fe22843 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -205,7 +205,7 @@ struct eventpoll { struct list_head rdllist; /* RB tree root used to store monitored fd structs */ - struct rb_root rbr; + struct rb_root_cached rbr; /* * This is a single linked list that chains all the "struct epitem" that @@ -796,7 +796,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) list_del_rcu(&epi->fllink); spin_unlock(&file->f_lock); - rb_erase(&epi->rbn, &ep->rbr); + rb_erase_cached(&epi->rbn, &ep->rbr); spin_lock_irqsave(&ep->lock, flags); if (ep_is_linked(&epi->rdllink)) @@ -840,7 +840,7 @@ static void ep_free(struct eventpoll *ep) /* * Walks through the whole tree by unregistering poll callbacks. */ - for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { + for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { epi = rb_entry(rbp, struct epitem, rbn); ep_unregister_pollwait(ep, epi); @@ -856,7 +856,7 @@ static void ep_free(struct eventpoll *ep) * a lockdep warning. */ mutex_lock(&ep->mtx); - while ((rbp = rb_first(&ep->rbr)) != NULL) { + while ((rbp = rb_first_cached(&ep->rbr)) != NULL) { epi = rb_entry(rbp, struct epitem, rbn); ep_remove(ep, epi); cond_resched(); @@ -963,7 +963,7 @@ static void ep_show_fdinfo(struct seq_file *m, struct file *f) struct rb_node *rbp; mutex_lock(&ep->mtx); - for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { + for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { struct epitem *epi = rb_entry(rbp, struct epitem, rbn); struct inode *inode = file_inode(epi->ffd.file); @@ -1040,7 +1040,7 @@ static int ep_alloc(struct eventpoll **pep) init_waitqueue_head(&ep->wq); init_waitqueue_head(&ep->poll_wait); INIT_LIST_HEAD(&ep->rdllist); - ep->rbr = RB_ROOT; + ep->rbr = RB_ROOT_CACHED; ep->ovflist = EP_UNACTIVE_PTR; ep->user = user; @@ -1066,7 +1066,7 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd) struct epoll_filefd ffd; ep_set_ffd(&ffd, file, fd); - for (rbp = ep->rbr.rb_node; rbp; ) { + for (rbp = ep->rbr.rb_root.rb_node; rbp; ) { epi = rb_entry(rbp, struct epitem, rbn); kcmp = ep_cmp_ffd(&ffd, &epi->ffd); if (kcmp > 0) @@ -1088,7 +1088,7 @@ static struct epitem *ep_find_tfd(struct eventpoll *ep, int tfd, unsigned long t struct rb_node *rbp; struct epitem *epi; - for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { + for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { epi = rb_entry(rbp, struct epitem, rbn); if (epi->ffd.fd == tfd) { if (toff == 0) @@ -1273,20 +1273,22 @@ static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) { int kcmp; - struct rb_node **p = &ep->rbr.rb_node, *parent = NULL; + struct rb_node **p = &ep->rbr.rb_root.rb_node, *parent = NULL; struct epitem *epic; + bool leftmost = true; while (*p) { parent = *p; epic = rb_entry(parent, struct epitem, rbn); kcmp = ep_cmp_ffd(&epi->ffd, &epic->ffd); - if (kcmp > 0) + if (kcmp > 0) { p = &parent->rb_right; - else + leftmost = false; + } else p = &parent->rb_left; } rb_link_node(&epi->rbn, parent, p); - rb_insert_color(&epi->rbn, &ep->rbr); + rb_insert_color_cached(&epi->rbn, &ep->rbr, leftmost); } @@ -1530,7 +1532,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, list_del_rcu(&epi->fllink); spin_unlock(&tfile->f_lock); - rb_erase(&epi->rbn, &ep->rbr); + rb_erase_cached(&epi->rbn, &ep->rbr); error_unregister: ep_unregister_pollwait(ep, epi); @@ -1878,7 +1880,7 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) mutex_lock_nested(&ep->mtx, call_nests + 1); ep->visited = 1; list_add(&ep->visited_list_link, &visited_list); - for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { + for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { epi = rb_entry(rbp, struct epitem, rbn); if (unlikely(is_file_epoll(epi->ffd.file))) { ep_tovisit = epi->ffd.file->private_data; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a791aac4c5af154051becb0b183c448acaf96c2d..fb96bb71da003046bf00b9698aaa7da7046d0530 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2253,7 +2253,10 @@ int f2fs_migrate_page(struct address_space *mapping, SetPagePrivate(newpage); set_page_private(newpage, page_private(page)); - migrate_page_copy(newpage, page); + if (mode != MIGRATE_SYNC_NO_COPY) + migrate_page_copy(newpage, page); + else + migrate_page_states(newpage, page); return MIGRATEPAGE_SUCCESS; } diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 6a7152d0c2503944d5e870ca8d3b78eb159b1003..02c066663a3a8329500836933383cc45bcd2e372 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -19,6 +19,8 @@ #include #include #include +#include + #include "fat.h" static inline unsigned long vfat_d_version(struct dentry *dentry) @@ -510,10 +512,8 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname, struct nls_table *nls) { const unsigned char *ip; - unsigned char nc; unsigned char *op; - unsigned int ec; - int i, k, fill; + int i, fill; int charlen; if (utf8) { @@ -530,33 +530,22 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname, i < len && *outlen < FAT_LFN_LEN; *outlen += 1) { if (escape && (*ip == ':')) { + u8 uc[2]; + if (i > len - 5) return -EINVAL; - ec = 0; - for (k = 1; k < 5; k++) { - nc = ip[k]; - ec <<= 4; - if (nc >= '0' && nc <= '9') { - ec |= nc - '0'; - continue; - } - if (nc >= 'a' && nc <= 'f') { - ec |= nc - ('a' - 10); - continue; - } - if (nc >= 'A' && nc <= 'F') { - ec |= nc - ('A' - 10); - continue; - } + + if (hex2bin(uc, ip + 1, 2) < 0) return -EINVAL; - } - *op++ = ec & 0xFF; - *op++ = ec >> 8; + + *(wchar_t *)op = uc[0] << 8 | uc[1]; + + op += 2; ip += 5; i += 5; } else { charlen = nls->char2uni(ip, len - i, - (wchar_t *)op); + (wchar_t *)op); if (charlen < 0) return -EINVAL; ip += charlen; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 7c02b3f738e15b9942198350290f68b1796074b4..59073e9f01a410f29150a3abf0087ac28409ad5b 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -334,7 +334,7 @@ static void remove_huge_page(struct page *page) } static void -hugetlb_vmdelete_list(struct rb_root *root, pgoff_t start, pgoff_t end) +hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end) { struct vm_area_struct *vma; @@ -498,7 +498,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) i_size_write(inode, offset); i_mmap_lock_write(mapping); - if (!RB_EMPTY_ROOT(&mapping->i_mmap)) + if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)) hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0); i_mmap_unlock_write(mapping); remove_inode_hugepages(inode, offset, LLONG_MAX); @@ -523,7 +523,7 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) inode_lock(inode); i_mmap_lock_write(mapping); - if (!RB_EMPTY_ROOT(&mapping->i_mmap)) + if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)) hugetlb_vmdelete_list(&mapping->i_mmap, hole_start >> PAGE_SHIFT, hole_end >> PAGE_SHIFT); @@ -830,7 +830,10 @@ static int hugetlbfs_migrate_page(struct address_space *mapping, rc = migrate_huge_page_move_mapping(mapping, newpage, page); if (rc != MIGRATEPAGE_SUCCESS) return rc; - migrate_page_copy(newpage, page); + if (mode != MIGRATE_SYNC_NO_COPY) + migrate_page_copy(newpage, page); + else + migrate_page_states(newpage, page); return MIGRATEPAGE_SUCCESS; } diff --git a/fs/inode.c b/fs/inode.c index 6a1626e0edafc572d1428279b0e741945b1da5fe..210054157a4998b901c366e5d27a211b39f23315 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -353,7 +353,7 @@ void address_space_init_once(struct address_space *mapping) init_rwsem(&mapping->i_mmap_rwsem); INIT_LIST_HEAD(&mapping->private_list); spin_lock_init(&mapping->private_lock); - mapping->i_mmap = RB_ROOT; + mapping->i_mmap = RB_ROOT_CACHED; } EXPORT_SYMBOL(address_space_init_once); diff --git a/fs/namei.c b/fs/namei.c index ddb6a7c2b3d4aee1c3512e064350822533968dc5..1180f9c58093f723781ce981d8a2f2ef40a3c461 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1129,9 +1129,18 @@ static int follow_automount(struct path *path, struct nameidata *nd, * of the daemon to instantiate them before they can be used. */ if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | - LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) && - path->dentry->d_inode) - return -EISDIR; + LOOKUP_OPEN | LOOKUP_CREATE | + LOOKUP_AUTOMOUNT))) { + /* Positive dentry that isn't meant to trigger an + * automount, EISDIR will allow it to be used, + * otherwise there's no mount here "now" so return + * ENOENT. + */ + if (path->dentry->d_inode) + return -EISDIR; + else + return -ENOENT; + } if (path->dentry->d_sb->s_user_ns != &init_user_ns) return -EACCES; diff --git a/fs/proc/generic.c b/fs/proc/generic.c index e3cda0b5968fa32ddceee4bf5388e8ec6d772937..793a6757466860e6b8c34f2559f8186c0756239e 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -40,8 +40,8 @@ static int proc_match(unsigned int len, const char *name, struct proc_dir_entry static struct proc_dir_entry *pde_subdir_first(struct proc_dir_entry *dir) { - return rb_entry_safe(rb_first(&dir->subdir), struct proc_dir_entry, - subdir_node); + return rb_entry_safe(rb_first_cached(&dir->subdir), + struct proc_dir_entry, subdir_node); } static struct proc_dir_entry *pde_subdir_next(struct proc_dir_entry *dir) @@ -54,7 +54,7 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir, const char *name, unsigned int len) { - struct rb_node *node = dir->subdir.rb_node; + struct rb_node *node = dir->subdir.rb_root.rb_node; while (node) { struct proc_dir_entry *de = rb_entry(node, @@ -75,8 +75,9 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir, static bool pde_subdir_insert(struct proc_dir_entry *dir, struct proc_dir_entry *de) { - struct rb_root *root = &dir->subdir; - struct rb_node **new = &root->rb_node, *parent = NULL; + struct rb_root_cached *root = &dir->subdir; + struct rb_node **new = &root->rb_root.rb_node, *parent = NULL; + bool leftmost = true; /* Figure out where to put new node */ while (*new) { @@ -88,15 +89,16 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir, parent = *new; if (result < 0) new = &(*new)->rb_left; - else if (result > 0) + else if (result > 0) { new = &(*new)->rb_right; - else + leftmost = false; + } else return false; } /* Add new node and rebalance tree. */ rb_link_node(&de->subdir_node, parent, new); - rb_insert_color(&de->subdir_node, root); + rb_insert_color_cached(&de->subdir_node, root, leftmost); return true; } @@ -369,7 +371,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, ent->namelen = qstr.len; ent->mode = mode; ent->nlink = nlink; - ent->subdir = RB_ROOT; + ent->subdir = RB_ROOT_CACHED; atomic_set(&ent->count, 1); spin_lock_init(&ent->pde_unload_lock); INIT_LIST_HEAD(&ent->pde_openers); @@ -499,6 +501,14 @@ struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, } EXPORT_SYMBOL(proc_create_data); +struct proc_dir_entry *proc_create(const char *name, umode_t mode, + struct proc_dir_entry *parent, + const struct file_operations *proc_fops) +{ + return proc_create_data(name, mode, parent, proc_fops, NULL); +} +EXPORT_SYMBOL(proc_create); + void proc_set_size(struct proc_dir_entry *de, loff_t size) { de->size = size; @@ -545,7 +555,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) de = pde_subdir_find(parent, fn, len); if (de) - rb_erase(&de->subdir_node, &parent->subdir); + rb_erase_cached(&de->subdir_node, &parent->subdir); write_unlock(&proc_subdir_lock); if (!de) { WARN(1, "name '%s'\n", name); @@ -582,13 +592,13 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) write_unlock(&proc_subdir_lock); return -ENOENT; } - rb_erase(&root->subdir_node, &parent->subdir); + rb_erase_cached(&root->subdir_node, &parent->subdir); de = root; while (1) { next = pde_subdir_first(de); if (next) { - rb_erase(&next->subdir_node, &de->subdir); + rb_erase_cached(&next->subdir_node, &de->subdir); de = next; continue; } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 2cbfcd32e884478c9806c64986874bf49bd8ed8e..a34195e92b206c9f2cd44ee4a627f53efedc3fd8 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -40,7 +40,7 @@ struct proc_dir_entry { const struct inode_operations *proc_iops; const struct file_operations *proc_fops; struct proc_dir_entry *parent; - struct rb_root subdir; + struct rb_root_cached subdir; struct rb_node subdir_node; void *data; atomic_t count; /* use count */ diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index d72fc40241d9c6897df7656a95402d0a39511bfd..a2bf369c923dcf1c3b788fcd16320bbc425fc1fd 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -196,7 +196,7 @@ static __net_init int proc_net_ns_init(struct net *net) if (!netd) goto out; - netd->subdir = RB_ROOT; + netd->subdir = RB_ROOT_CACHED; netd->data = net; netd->nlink = 2; netd->namelen = 3; diff --git a/fs/proc/root.c b/fs/proc/root.c index deecb397daa30d769f6750f11dcc7cbbfec805fb..926fb27f4ca26cb27c17c2a3d9eed9b9e32fc639 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -210,7 +210,7 @@ struct proc_dir_entry proc_root = { .proc_iops = &proc_root_inode_operations, .proc_fops = &proc_root_operations, .parent = &proc_root, - .subdir = RB_ROOT, + .subdir = RB_ROOT_CACHED, .name = "/proc", }; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index a290966f91eccf57dd82d22941026490b99e3d7c..7b40e11ede9bbf718e31e23819bf36cfd1093b76 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -268,8 +268,7 @@ static int do_maps_open(struct inode *inode, struct file *file, * Indicate if the VMA is a stack for the given task; for * /proc/PID/maps that is the stack of the main task. */ -static int is_stack(struct proc_maps_private *priv, - struct vm_area_struct *vma) +static int is_stack(struct vm_area_struct *vma) { /* * We make no effort to guess what a given thread considers to be @@ -302,7 +301,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) { struct mm_struct *mm = vma->vm_mm; struct file *file = vma->vm_file; - struct proc_maps_private *priv = m->private; vm_flags_t flags = vma->vm_flags; unsigned long ino = 0; unsigned long long pgoff = 0; @@ -350,7 +348,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) goto done; } - if (is_stack(priv, vma)) + if (is_stack(vma)) name = "[stack]"; } @@ -549,6 +547,8 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, } } else if (is_migration_entry(swpent)) page = migration_entry_to_page(swpent); + else if (is_device_private_entry(swpent)) + page = device_private_entry_to_page(swpent); } else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap && pte_none(*pte))) { page = find_get_entry(vma->vm_file->f_mapping, @@ -608,13 +608,14 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { - smaps_pmd_entry(pmd, addr, walk); + if (pmd_present(*pmd)) + smaps_pmd_entry(pmd, addr, walk); spin_unlock(ptl); - return 0; + goto out; } if (pmd_trans_unstable(pmd)) - return 0; + goto out; /* * The mmap_sem held all the way back in m_start() is what * keeps khugepaged out of here and from collapsing things @@ -624,6 +625,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, for (; addr != end; pte++, addr += PAGE_SIZE) smaps_pte_entry(pte, addr, walk); pte_unmap_unlock(pte - 1, ptl); +out: cond_resched(); return 0; } @@ -712,6 +714,8 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, if (is_migration_entry(swpent)) page = migration_entry_to_page(swpent); + else if (is_device_private_entry(swpent)) + page = device_private_entry_to_page(swpent); } if (page) { int mapcount = page_mapcount(page); @@ -977,17 +981,22 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, { pmd_t pmd = *pmdp; - /* See comment in change_huge_pmd() */ - pmdp_invalidate(vma, addr, pmdp); - if (pmd_dirty(*pmdp)) - pmd = pmd_mkdirty(pmd); - if (pmd_young(*pmdp)) - pmd = pmd_mkyoung(pmd); - - pmd = pmd_wrprotect(pmd); - pmd = pmd_clear_soft_dirty(pmd); - - set_pmd_at(vma->vm_mm, addr, pmdp, pmd); + if (pmd_present(pmd)) { + /* See comment in change_huge_pmd() */ + pmdp_invalidate(vma, addr, pmdp); + if (pmd_dirty(*pmdp)) + pmd = pmd_mkdirty(pmd); + if (pmd_young(*pmdp)) + pmd = pmd_mkyoung(pmd); + + pmd = pmd_wrprotect(pmd); + pmd = pmd_clear_soft_dirty(pmd); + + set_pmd_at(vma->vm_mm, addr, pmdp, pmd); + } else if (is_migration_entry(pmd_to_swp_entry(pmd))) { + pmd = pmd_swp_clear_soft_dirty(pmd); + set_pmd_at(vma->vm_mm, addr, pmdp, pmd); + } } #else static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, @@ -1012,6 +1021,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, goto out; } + if (!pmd_present(*pmd)) + goto out; + page = pmd_page(*pmd); /* Clear accessed and referenced bits. */ @@ -1254,7 +1266,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, if (pm->show_pfn) frame = pte_pfn(pte); flags |= PM_PRESENT; - page = vm_normal_page(vma, addr, pte); + page = _vm_normal_page(vma, addr, pte, true); if (pte_soft_dirty(pte)) flags |= PM_SOFT_DIRTY; } else if (is_swap_pte(pte)) { @@ -1267,6 +1279,9 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, flags |= PM_SWAP; if (is_migration_entry(entry)) page = migration_entry_to_page(entry); + + if (is_device_private_entry(entry)) + page = device_private_entry_to_page(entry); } if (page && !PageAnon(page)) @@ -1293,27 +1308,33 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, if (ptl) { u64 flags = 0, frame = 0; pmd_t pmd = *pmdp; + struct page *page = NULL; if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(pmd)) flags |= PM_SOFT_DIRTY; - /* - * Currently pmd for thp is always present because thp - * can not be swapped-out, migrated, or HWPOISONed - * (split in such cases instead.) - * This if-check is just to prepare for future implementation. - */ if (pmd_present(pmd)) { - struct page *page = pmd_page(pmd); - - if (page_mapcount(page) == 1) - flags |= PM_MMAP_EXCLUSIVE; + page = pmd_page(pmd); flags |= PM_PRESENT; if (pm->show_pfn) frame = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); } +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION + else if (is_swap_pmd(pmd)) { + swp_entry_t entry = pmd_to_swp_entry(pmd); + + frame = swp_type(entry) | + (swp_offset(entry) << MAX_SWAPFILES_SHIFT); + flags |= PM_SWAP; + VM_BUG_ON(!is_pmd_migration_entry(pmd)); + page = migration_entry_to_page(entry); + } +#endif + + if (page && page_mapcount(page) == 1) + flags |= PM_MMAP_EXCLUSIVE; for (; addr != end; addr += PAGE_SIZE) { pagemap_entry_t pme = make_pme(frame, flags); @@ -1746,7 +1767,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) seq_file_path(m, file, "\n\t= "); } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { seq_puts(m, " heap"); - } else if (is_stack(proc_priv, vma)) { + } else if (is_stack(vma)) { seq_puts(m, " stack"); } diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 23266694db117155f6e23284fecbf69800cecb09..dea90b566a6e9e8326a4f88a79312e8cd10410e6 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -125,8 +125,7 @@ unsigned long task_statm(struct mm_struct *mm, return size; } -static int is_stack(struct proc_maps_private *priv, - struct vm_area_struct *vma) +static int is_stack(struct vm_area_struct *vma) { struct mm_struct *mm = vma->vm_mm; @@ -178,7 +177,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, if (file) { seq_pad(m, ' '); seq_file_path(m, file, ""); - } else if (mm && is_stack(priv, vma)) { + } else if (mm && is_stack(vma)) { seq_pad(m, ' '); seq_printf(m, "[stack]"); } diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index f90a466ea5db30bc1fcc05a3999d11eb2898727c..a02aa59d1e245124dcead693b15cd128fa2b1f4e 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1490,7 +1490,10 @@ static int ubifs_migrate_page(struct address_space *mapping, SetPagePrivate(newpage); } - migrate_page_copy(newpage, page); + if (mode != MIGRATE_SYNC_NO_COPY) + migrate_page_copy(newpage, page); + else + migrate_page_states(newpage, page); return MIGRATEPAGE_SUCCESS; } #endif diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 5419e7da82ba5a2729f65192efa60fd38b5e1bd6..ef4b48d1ea4270f59f46e29ae898a48cdb0c8aa6 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -381,8 +381,26 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) * in __get_user_pages if userfaultfd_release waits on the * caller of handle_userfault to release the mmap_sem. */ - if (unlikely(ACCESS_ONCE(ctx->released))) + if (unlikely(ACCESS_ONCE(ctx->released))) { + /* + * Don't return VM_FAULT_SIGBUS in this case, so a non + * cooperative manager can close the uffd after the + * last UFFDIO_COPY, without risking to trigger an + * involuntary SIGBUS if the process was starting the + * userfaultfd while the userfaultfd was still armed + * (but after the last UFFDIO_COPY). If the uffd + * wasn't already closed when the userfault reached + * this point, that would normally be solved by + * userfaultfd_must_wait returning 'false'. + * + * If we were to return VM_FAULT_SIGBUS here, the non + * cooperative manager would be instead forced to + * always call UFFDIO_UNREGISTER before it can safely + * close the uffd. + */ + ret = VM_FAULT_NOPAGE; goto out; + } /* * Check that we can return VM_FAULT_RETRY. diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 4d7bb98f41340f52881f78a4d8e4b9dc2f21600f..8e02430365640e2e026c1a244eef70dadab6e080 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -630,7 +630,24 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, #define arch_start_context_switch(prev) do {} while (0) #endif -#ifndef CONFIG_HAVE_ARCH_SOFT_DIRTY +#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY +#ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION +static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) +{ + return pmd; +} + +static inline int pmd_swp_soft_dirty(pmd_t pmd) +{ + return 0; +} + +static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) +{ + return pmd; +} +#endif +#else /* !CONFIG_HAVE_ARCH_SOFT_DIRTY */ static inline int pte_soft_dirty(pte_t pte) { return 0; @@ -675,6 +692,21 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) { return pte; } + +static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) +{ + return pmd; +} + +static inline int pmd_swp_soft_dirty(pmd_t pmd) +{ + return 0; +} + +static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) +{ + return pmd; +} #endif #ifndef __HAVE_PFNMAP_TRACKING @@ -846,7 +878,23 @@ static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) #ifdef CONFIG_TRANSPARENT_HUGEPAGE barrier(); #endif - if (pmd_none(pmdval) || pmd_trans_huge(pmdval)) + /* + * !pmd_present() checks for pmd migration entries + * + * The complete check uses is_pmd_migration_entry() in linux/swapops.h + * But using that requires moving current function and pmd_trans_unstable() + * to linux/swapops.h to resovle dependency, which is too much code move. + * + * !pmd_present() is equivalent to is_pmd_migration_entry() currently, + * because !pmd_present() pages can only be under migration not swapped + * out. + * + * pmd_none() is preseved for future condition checks on pmd migration + * entries and not confusing with this function name, although it is + * redundant with !pmd_present(). + */ + if (pmd_none(pmdval) || pmd_trans_huge(pmdval) || + (IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION) && !pmd_present(pmdval))) return 1; if (unlikely(pmd_bad(pmdval))) { pmd_clear_bad(pmd); diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index 49b292e98fecf1da98e38c005eaf3137478626ef..8d10fc97801cee91bdb35682eee1e7759e4177b5 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -172,7 +172,7 @@ struct drm_mm { * according to the (increasing) start address of the memory node. */ struct drm_mm_node head_node; /* Keep an interval_tree for fast lookup of drm_mm_nodes by address. */ - struct rb_root interval_tree; + struct rb_root_cached interval_tree; struct rb_root holes_size; struct rb_root holes_addr; diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 5797ca6fdfe2e658bbfeb225e05b0b8cbe45d40d..700cf5f67118504d85def31a2be37c7c7b117ce3 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -360,6 +360,38 @@ static inline int bitmap_parse(const char *buf, unsigned int buflen, return __bitmap_parse(buf, buflen, 0, maskp, nmaskbits); } +/* + * BITMAP_FROM_U64() - Represent u64 value in the format suitable for bitmap. + * + * Linux bitmaps are internally arrays of unsigned longs, i.e. 32-bit + * integers in 32-bit environment, and 64-bit integers in 64-bit one. + * + * There are four combinations of endianness and length of the word in linux + * ABIs: LE64, BE64, LE32 and BE32. + * + * On 64-bit kernels 64-bit LE and BE numbers are naturally ordered in + * bitmaps and therefore don't require any special handling. + * + * On 32-bit kernels 32-bit LE ABI orders lo word of 64-bit number in memory + * prior to hi, and 32-bit BE orders hi word prior to lo. The bitmap on the + * other hand is represented as an array of 32-bit words and the position of + * bit N may therefore be calculated as: word #(N/32) and bit #(N%32) in that + * word. For example, bit #42 is located at 10th position of 2nd word. + * It matches 32-bit LE ABI, and we can simply let the compiler store 64-bit + * values in memory as it usually does. But for BE we need to swap hi and lo + * words manually. + * + * With all that, the macro BITMAP_FROM_U64() does explicit reordering of hi and + * lo parts of u64. For LE32 it does nothing, and for BE environment it swaps + * hi and lo words, as is expected by bitmap. + */ +#if __BITS_PER_LONG == 64 +#define BITMAP_FROM_U64(n) (n) +#else +#define BITMAP_FROM_U64(n) ((unsigned long) ((u64)(n) & ULONG_MAX)), \ + ((unsigned long) ((u64)(n) >> 32)) +#endif + /* * bitmap_from_u64 - Check and swap words within u64. * @mask: source bitmap diff --git a/include/linux/bitops.h b/include/linux/bitops.h index a83c822c35c2424095b512dd04f95609cadfdaa0..8fbe259b197c987a56190328997aa8ded227c067 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -19,10 +19,11 @@ * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ #define GENMASK(h, l) \ - (((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) + (((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) #define GENMASK_ULL(h, l) \ - (((~0ULL) << (l)) & (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h)))) + (((~0ULL) - (1ULL << (l)) + 1) & \ + (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h)))) extern unsigned int __sw_hweight8(unsigned int w); extern unsigned int __sw_hweight16(unsigned int w); diff --git a/include/linux/byteorder/big_endian.h b/include/linux/byteorder/big_endian.h index 392041475c720148c4332b38ea81cd3d1460f425..ffd215988392d96db0739118382bc3e4257efb29 100644 --- a/include/linux/byteorder/big_endian.h +++ b/include/linux/byteorder/big_endian.h @@ -3,5 +3,9 @@ #include +#ifndef CONFIG_CPU_BIG_ENDIAN +#warning inconsistent configuration, needs CONFIG_CPU_BIG_ENDIAN +#endif + #include #endif /* _LINUX_BYTEORDER_BIG_ENDIAN_H */ diff --git a/include/linux/byteorder/little_endian.h b/include/linux/byteorder/little_endian.h index 08057377aa23299d59c281ff09030c091392f844..ba910bb9aad0c75b83be099bf98b844505f72688 100644 --- a/include/linux/byteorder/little_endian.h +++ b/include/linux/byteorder/little_endian.h @@ -3,5 +3,9 @@ #include +#ifdef CONFIG_CPU_BIG_ENDIAN +#warning inconsistent configuration, CONFIG_CPU_BIG_ENDIAN is set +#endif + #include #endif /* _LINUX_BYTEORDER_LITTLE_ENDIAN_H */ diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 4bf4479a3a800c435ccec039f7e5da4089910f87..cd415b733c2a0595ca8c58c22f23ffb5b5126bc4 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -32,15 +32,15 @@ typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; #define cpumask_pr_args(maskp) nr_cpu_ids, cpumask_bits(maskp) #if NR_CPUS == 1 -#define nr_cpu_ids 1 +#define nr_cpu_ids 1U #else -extern int nr_cpu_ids; +extern unsigned int nr_cpu_ids; #endif #ifdef CONFIG_CPUMASK_OFFSTACK /* Assuming NR_CPUS is huge, a runtime limit is more efficient. Also, * not all bits may be allocated. */ -#define nr_cpumask_bits ((unsigned int)nr_cpu_ids) +#define nr_cpumask_bits nr_cpu_ids #else #define nr_cpumask_bits ((unsigned int)NR_CPUS) #endif @@ -178,20 +178,7 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) return find_first_bit(cpumask_bits(srcp), nr_cpumask_bits); } -/** - * cpumask_next - get the next cpu in a cpumask - * @n: the cpu prior to the place to search (ie. return will be > @n) - * @srcp: the cpumask pointer - * - * Returns >= nr_cpu_ids if no further cpus set. - */ -static inline unsigned int cpumask_next(int n, const struct cpumask *srcp) -{ - /* -1 is a legal arg here. */ - if (n != -1) - cpumask_check(n); - return find_next_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1); -} +unsigned int cpumask_next(int n, const struct cpumask *srcp); /** * cpumask_next_zero - get the next unset cpu in a cpumask diff --git a/include/linux/fs.h b/include/linux/fs.h index 509434aaf5a46f11479db99c7654992f58dbede4..2d0e6748e46e1f6707b0de2b127cf5ee59553c3c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -392,7 +392,7 @@ struct address_space { struct radix_tree_root page_tree; /* radix tree of all pages */ spinlock_t tree_lock; /* and lock protecting it */ atomic_t i_mmap_writable;/* count VM_SHARED mappings */ - struct rb_root i_mmap; /* tree of private and shared mappings */ + struct rb_root_cached i_mmap; /* tree of private and shared mappings */ struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */ /* Protected by tree_lock together with the radix tree */ unsigned long nrpages; /* number of total pages */ @@ -487,7 +487,7 @@ static inline void i_mmap_unlock_read(struct address_space *mapping) */ static inline int mapping_mapped(struct address_space *mapping) { - return !RB_EMPTY_ROOT(&mapping->i_mmap); + return !RB_EMPTY_ROOT(&mapping->i_mmap.rb_root); } /* @@ -3043,8 +3043,7 @@ static inline int vfs_lstat(const char __user *name, struct kstat *stat) static inline int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, int flags) { - return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT, - stat, STATX_BASIC_STATS); + return vfs_statx(dfd, filename, flags, stat, STATX_BASIC_STATS); } static inline int vfs_fstat(int fd, struct kstat *stat) { diff --git a/include/linux/hmm.h b/include/linux/hmm.h new file mode 100644 index 0000000000000000000000000000000000000000..96e69979f84d7b7fe1edb6568097bf32cec97a92 --- /dev/null +++ b/include/linux/hmm.h @@ -0,0 +1,520 @@ +/* + * Copyright 2013 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Authors: Jérôme Glisse + */ +/* + * Heterogeneous Memory Management (HMM) + * + * See Documentation/vm/hmm.txt for reasons and overview of what HMM is and it + * is for. Here we focus on the HMM API description, with some explanation of + * the underlying implementation. + * + * Short description: HMM provides a set of helpers to share a virtual address + * space between CPU and a device, so that the device can access any valid + * address of the process (while still obeying memory protection). HMM also + * provides helpers to migrate process memory to device memory, and back. Each + * set of functionality (address space mirroring, and migration to and from + * device memory) can be used independently of the other. + * + * + * HMM address space mirroring API: + * + * Use HMM address space mirroring if you want to mirror range of the CPU page + * table of a process into a device page table. Here, "mirror" means "keep + * synchronized". Prerequisites: the device must provide the ability to write- + * protect its page tables (at PAGE_SIZE granularity), and must be able to + * recover from the resulting potential page faults. + * + * HMM guarantees that at any point in time, a given virtual address points to + * either the same memory in both CPU and device page tables (that is: CPU and + * device page tables each point to the same pages), or that one page table (CPU + * or device) points to no entry, while the other still points to the old page + * for the address. The latter case happens when the CPU page table update + * happens first, and then the update is mirrored over to the device page table. + * This does not cause any issue, because the CPU page table cannot start + * pointing to a new page until the device page table is invalidated. + * + * HMM uses mmu_notifiers to monitor the CPU page tables, and forwards any + * updates to each device driver that has registered a mirror. It also provides + * some API calls to help with taking a snapshot of the CPU page table, and to + * synchronize with any updates that might happen concurrently. + * + * + * HMM migration to and from device memory: + * + * HMM provides a set of helpers to hotplug device memory as ZONE_DEVICE, with + * a new MEMORY_DEVICE_PRIVATE type. This provides a struct page for each page + * of the device memory, and allows the device driver to manage its memory + * using those struct pages. Having struct pages for device memory makes + * migration easier. Because that memory is not addressable by the CPU it must + * never be pinned to the device; in other words, any CPU page fault can always + * cause the device memory to be migrated (copied/moved) back to regular memory. + * + * A new migrate helper (migrate_vma()) has been added (see mm/migrate.c) that + * allows use of a device DMA engine to perform the copy operation between + * regular system memory and device memory. + */ +#ifndef LINUX_HMM_H +#define LINUX_HMM_H + +#include + +#if IS_ENABLED(CONFIG_HMM) + +#include +#include +#include +#include + +struct hmm; + +/* + * hmm_pfn_t - HMM uses its own pfn type to keep several flags per page + * + * Flags: + * HMM_PFN_VALID: pfn is valid + * HMM_PFN_READ: CPU page table has read permission set + * HMM_PFN_WRITE: CPU page table has write permission set + * HMM_PFN_ERROR: corresponding CPU page table entry points to poisoned memory + * HMM_PFN_EMPTY: corresponding CPU page table entry is pte_none() + * HMM_PFN_SPECIAL: corresponding CPU page table entry is special; i.e., the + * result of vm_insert_pfn() or vm_insert_page(). Therefore, it should not + * be mirrored by a device, because the entry will never have HMM_PFN_VALID + * set and the pfn value is undefined. + * HMM_PFN_DEVICE_UNADDRESSABLE: unaddressable device memory (ZONE_DEVICE) + */ +typedef unsigned long hmm_pfn_t; + +#define HMM_PFN_VALID (1 << 0) +#define HMM_PFN_READ (1 << 1) +#define HMM_PFN_WRITE (1 << 2) +#define HMM_PFN_ERROR (1 << 3) +#define HMM_PFN_EMPTY (1 << 4) +#define HMM_PFN_SPECIAL (1 << 5) +#define HMM_PFN_DEVICE_UNADDRESSABLE (1 << 6) +#define HMM_PFN_SHIFT 7 + +/* + * hmm_pfn_t_to_page() - return struct page pointed to by a valid hmm_pfn_t + * @pfn: hmm_pfn_t to convert to struct page + * Returns: struct page pointer if pfn is a valid hmm_pfn_t, NULL otherwise + * + * If the hmm_pfn_t is valid (ie valid flag set) then return the struct page + * matching the pfn value stored in the hmm_pfn_t. Otherwise return NULL. + */ +static inline struct page *hmm_pfn_t_to_page(hmm_pfn_t pfn) +{ + if (!(pfn & HMM_PFN_VALID)) + return NULL; + return pfn_to_page(pfn >> HMM_PFN_SHIFT); +} + +/* + * hmm_pfn_t_to_pfn() - return pfn value store in a hmm_pfn_t + * @pfn: hmm_pfn_t to extract pfn from + * Returns: pfn value if hmm_pfn_t is valid, -1UL otherwise + */ +static inline unsigned long hmm_pfn_t_to_pfn(hmm_pfn_t pfn) +{ + if (!(pfn & HMM_PFN_VALID)) + return -1UL; + return (pfn >> HMM_PFN_SHIFT); +} + +/* + * hmm_pfn_t_from_page() - create a valid hmm_pfn_t value from struct page + * @page: struct page pointer for which to create the hmm_pfn_t + * Returns: valid hmm_pfn_t for the page + */ +static inline hmm_pfn_t hmm_pfn_t_from_page(struct page *page) +{ + return (page_to_pfn(page) << HMM_PFN_SHIFT) | HMM_PFN_VALID; +} + +/* + * hmm_pfn_t_from_pfn() - create a valid hmm_pfn_t value from pfn + * @pfn: pfn value for which to create the hmm_pfn_t + * Returns: valid hmm_pfn_t for the pfn + */ +static inline hmm_pfn_t hmm_pfn_t_from_pfn(unsigned long pfn) +{ + return (pfn << HMM_PFN_SHIFT) | HMM_PFN_VALID; +} + + +#if IS_ENABLED(CONFIG_HMM_MIRROR) +/* + * Mirroring: how to synchronize device page table with CPU page table. + * + * A device driver that is participating in HMM mirroring must always + * synchronize with CPU page table updates. For this, device drivers can either + * directly use mmu_notifier APIs or they can use the hmm_mirror API. Device + * drivers can decide to register one mirror per device per process, or just + * one mirror per process for a group of devices. The pattern is: + * + * int device_bind_address_space(..., struct mm_struct *mm, ...) + * { + * struct device_address_space *das; + * + * // Device driver specific initialization, and allocation of das + * // which contains an hmm_mirror struct as one of its fields. + * ... + * + * ret = hmm_mirror_register(&das->mirror, mm, &device_mirror_ops); + * if (ret) { + * // Cleanup on error + * return ret; + * } + * + * // Other device driver specific initialization + * ... + * } + * + * Once an hmm_mirror is registered for an address space, the device driver + * will get callbacks through sync_cpu_device_pagetables() operation (see + * hmm_mirror_ops struct). + * + * Device driver must not free the struct containing the hmm_mirror struct + * before calling hmm_mirror_unregister(). The expected usage is to do that when + * the device driver is unbinding from an address space. + * + * + * void device_unbind_address_space(struct device_address_space *das) + * { + * // Device driver specific cleanup + * ... + * + * hmm_mirror_unregister(&das->mirror); + * + * // Other device driver specific cleanup, and now das can be freed + * ... + * } + */ + +struct hmm_mirror; + +/* + * enum hmm_update_type - type of update + * @HMM_UPDATE_INVALIDATE: invalidate range (no indication as to why) + */ +enum hmm_update_type { + HMM_UPDATE_INVALIDATE, +}; + +/* + * struct hmm_mirror_ops - HMM mirror device operations callback + * + * @update: callback to update range on a device + */ +struct hmm_mirror_ops { + /* sync_cpu_device_pagetables() - synchronize page tables + * + * @mirror: pointer to struct hmm_mirror + * @update_type: type of update that occurred to the CPU page table + * @start: virtual start address of the range to update + * @end: virtual end address of the range to update + * + * This callback ultimately originates from mmu_notifiers when the CPU + * page table is updated. The device driver must update its page table + * in response to this callback. The update argument tells what action + * to perform. + * + * The device driver must not return from this callback until the device + * page tables are completely updated (TLBs flushed, etc); this is a + * synchronous call. + */ + void (*sync_cpu_device_pagetables)(struct hmm_mirror *mirror, + enum hmm_update_type update_type, + unsigned long start, + unsigned long end); +}; + +/* + * struct hmm_mirror - mirror struct for a device driver + * + * @hmm: pointer to struct hmm (which is unique per mm_struct) + * @ops: device driver callback for HMM mirror operations + * @list: for list of mirrors of a given mm + * + * Each address space (mm_struct) being mirrored by a device must register one + * instance of an hmm_mirror struct with HMM. HMM will track the list of all + * mirrors for each mm_struct. + */ +struct hmm_mirror { + struct hmm *hmm; + const struct hmm_mirror_ops *ops; + struct list_head list; +}; + +int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm); +void hmm_mirror_unregister(struct hmm_mirror *mirror); + + +/* + * struct hmm_range - track invalidation lock on virtual address range + * + * @list: all range lock are on a list + * @start: range virtual start address (inclusive) + * @end: range virtual end address (exclusive) + * @pfns: array of pfns (big enough for the range) + * @valid: pfns array did not change since it has been fill by an HMM function + */ +struct hmm_range { + struct list_head list; + unsigned long start; + unsigned long end; + hmm_pfn_t *pfns; + bool valid; +}; + +/* + * To snapshot the CPU page table, call hmm_vma_get_pfns(), then take a device + * driver lock that serializes device page table updates, then call + * hmm_vma_range_done(), to check if the snapshot is still valid. The same + * device driver page table update lock must also be used in the + * hmm_mirror_ops.sync_cpu_device_pagetables() callback, so that CPU page + * table invalidation serializes on it. + * + * YOU MUST CALL hmm_vma_range_done() ONCE AND ONLY ONCE EACH TIME YOU CALL + * hmm_vma_get_pfns() WITHOUT ERROR ! + * + * IF YOU DO NOT FOLLOW THE ABOVE RULE THE SNAPSHOT CONTENT MIGHT BE INVALID ! + */ +int hmm_vma_get_pfns(struct vm_area_struct *vma, + struct hmm_range *range, + unsigned long start, + unsigned long end, + hmm_pfn_t *pfns); +bool hmm_vma_range_done(struct vm_area_struct *vma, struct hmm_range *range); + + +/* + * Fault memory on behalf of device driver. Unlike handle_mm_fault(), this will + * not migrate any device memory back to system memory. The hmm_pfn_t array will + * be updated with the fault result and current snapshot of the CPU page table + * for the range. + * + * The mmap_sem must be taken in read mode before entering and it might be + * dropped by the function if the block argument is false. In that case, the + * function returns -EAGAIN. + * + * Return value does not reflect if the fault was successful for every single + * address or not. Therefore, the caller must to inspect the hmm_pfn_t array to + * determine fault status for each address. + * + * Trying to fault inside an invalid vma will result in -EINVAL. + * + * See the function description in mm/hmm.c for further documentation. + */ +int hmm_vma_fault(struct vm_area_struct *vma, + struct hmm_range *range, + unsigned long start, + unsigned long end, + hmm_pfn_t *pfns, + bool write, + bool block); +#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ + + +#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC) +struct hmm_devmem; + +struct page *hmm_vma_alloc_locked_page(struct vm_area_struct *vma, + unsigned long addr); + +/* + * struct hmm_devmem_ops - callback for ZONE_DEVICE memory events + * + * @free: call when refcount on page reach 1 and thus is no longer use + * @fault: call when there is a page fault to unaddressable memory + * + * Both callback happens from page_free() and page_fault() callback of struct + * dev_pagemap respectively. See include/linux/memremap.h for more details on + * those. + * + * The hmm_devmem_ops callback are just here to provide a coherent and + * uniq API to device driver and device driver should not register their + * own page_free() or page_fault() but rely on the hmm_devmem_ops call- + * back. + */ +struct hmm_devmem_ops { + /* + * free() - free a device page + * @devmem: device memory structure (see struct hmm_devmem) + * @page: pointer to struct page being freed + * + * Call back occurs whenever a device page refcount reach 1 which + * means that no one is holding any reference on the page anymore + * (ZONE_DEVICE page have an elevated refcount of 1 as default so + * that they are not release to the general page allocator). + * + * Note that callback has exclusive ownership of the page (as no + * one is holding any reference). + */ + void (*free)(struct hmm_devmem *devmem, struct page *page); + /* + * fault() - CPU page fault or get user page (GUP) + * @devmem: device memory structure (see struct hmm_devmem) + * @vma: virtual memory area containing the virtual address + * @addr: virtual address that faulted or for which there is a GUP + * @page: pointer to struct page backing virtual address (unreliable) + * @flags: FAULT_FLAG_* (see include/linux/mm.h) + * @pmdp: page middle directory + * Returns: VM_FAULT_MINOR/MAJOR on success or one of VM_FAULT_ERROR + * on error + * + * The callback occurs whenever there is a CPU page fault or GUP on a + * virtual address. This means that the device driver must migrate the + * page back to regular memory (CPU accessible). + * + * The device driver is free to migrate more than one page from the + * fault() callback as an optimization. However if device decide to + * migrate more than one page it must always priotirize the faulting + * address over the others. + * + * The struct page pointer is only given as an hint to allow quick + * lookup of internal device driver data. A concurrent migration + * might have already free that page and the virtual address might + * not longer be back by it. So it should not be modified by the + * callback. + * + * Note that mmap semaphore is held in read mode at least when this + * callback occurs, hence the vma is valid upon callback entry. + */ + int (*fault)(struct hmm_devmem *devmem, + struct vm_area_struct *vma, + unsigned long addr, + const struct page *page, + unsigned int flags, + pmd_t *pmdp); +}; + +/* + * struct hmm_devmem - track device memory + * + * @completion: completion object for device memory + * @pfn_first: first pfn for this resource (set by hmm_devmem_add()) + * @pfn_last: last pfn for this resource (set by hmm_devmem_add()) + * @resource: IO resource reserved for this chunk of memory + * @pagemap: device page map for that chunk + * @device: device to bind resource to + * @ops: memory operations callback + * @ref: per CPU refcount + * + * This an helper structure for device drivers that do not wish to implement + * the gory details related to hotplugging new memoy and allocating struct + * pages. + * + * Device drivers can directly use ZONE_DEVICE memory on their own if they + * wish to do so. + */ +struct hmm_devmem { + struct completion completion; + unsigned long pfn_first; + unsigned long pfn_last; + struct resource *resource; + struct device *device; + struct dev_pagemap pagemap; + const struct hmm_devmem_ops *ops; + struct percpu_ref ref; +}; + +/* + * To add (hotplug) device memory, HMM assumes that there is no real resource + * that reserves a range in the physical address space (this is intended to be + * use by unaddressable device memory). It will reserve a physical range big + * enough and allocate struct page for it. + * + * The device driver can wrap the hmm_devmem struct inside a private device + * driver struct. The device driver must call hmm_devmem_remove() before the + * device goes away and before freeing the hmm_devmem struct memory. + */ +struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops, + struct device *device, + unsigned long size); +struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops, + struct device *device, + struct resource *res); +void hmm_devmem_remove(struct hmm_devmem *devmem); + +/* + * hmm_devmem_page_set_drvdata - set per-page driver data field + * + * @page: pointer to struct page + * @data: driver data value to set + * + * Because page can not be on lru we have an unsigned long that driver can use + * to store a per page field. This just a simple helper to do that. + */ +static inline void hmm_devmem_page_set_drvdata(struct page *page, + unsigned long data) +{ + unsigned long *drvdata = (unsigned long *)&page->pgmap; + + drvdata[1] = data; +} + +/* + * hmm_devmem_page_get_drvdata - get per page driver data field + * + * @page: pointer to struct page + * Return: driver data value + */ +static inline unsigned long hmm_devmem_page_get_drvdata(struct page *page) +{ + unsigned long *drvdata = (unsigned long *)&page->pgmap; + + return drvdata[1]; +} + + +/* + * struct hmm_device - fake device to hang device memory onto + * + * @device: device struct + * @minor: device minor number + */ +struct hmm_device { + struct device device; + unsigned int minor; +}; + +/* + * A device driver that wants to handle multiple devices memory through a + * single fake device can use hmm_device to do so. This is purely a helper and + * it is not strictly needed, in order to make use of any HMM functionality. + */ +struct hmm_device *hmm_device_new(void *drvdata); +void hmm_device_put(struct hmm_device *hmm_device); +#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ +#endif /* IS_ENABLED(CONFIG_HMM) */ + +/* Below are for HMM internal use only! Not to be used by device driver! */ +#if IS_ENABLED(CONFIG_HMM_MIRROR) +void hmm_mm_destroy(struct mm_struct *mm); + +static inline void hmm_mm_init(struct mm_struct *mm) +{ + mm->hmm = NULL; +} +#else /* IS_ENABLED(CONFIG_HMM_MIRROR) */ +static inline void hmm_mm_destroy(struct mm_struct *mm) {} +static inline void hmm_mm_init(struct mm_struct *mm) {} +#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ + + +#else /* IS_ENABLED(CONFIG_HMM) */ +static inline void hmm_mm_destroy(struct mm_struct *mm) {} +static inline void hmm_mm_init(struct mm_struct *mm) {} +#endif /* LINUX_HMM_H */ diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index ee696347f928ffc7b0aac51e3c6e1180c11265c9..14bc21c2ee7ff8d9c7956b9c317d4a39b36cc0c0 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -147,7 +147,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, #define split_huge_pmd(__vma, __pmd, __address) \ do { \ pmd_t *____pmd = (__pmd); \ - if (pmd_trans_huge(*____pmd) \ + if (is_swap_pmd(*____pmd) || pmd_trans_huge(*____pmd) \ || pmd_devmap(*____pmd)) \ __split_huge_pmd(__vma, __pmd, __address, \ false, NULL); \ @@ -178,12 +178,18 @@ extern spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma); extern spinlock_t *__pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma); + +static inline int is_swap_pmd(pmd_t pmd) +{ + return !pmd_none(pmd) && !pmd_present(pmd); +} + /* mmap_sem must be held on entry */ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) { VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma); - if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) + if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) return __pmd_trans_huge_lock(pmd, vma); else return NULL; @@ -233,6 +239,11 @@ void mm_put_huge_zero_page(struct mm_struct *mm); #define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot)) +static inline bool thp_migration_supported(void) +{ + return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION); +} + #else /* CONFIG_TRANSPARENT_HUGEPAGE */ #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; }) @@ -294,6 +305,10 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, long adjust_next) { } +static inline int is_swap_pmd(pmd_t pmd) +{ + return 0; +} static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) { @@ -336,6 +351,11 @@ static inline struct page *follow_devmap_pud(struct vm_area_struct *vma, { return NULL; } + +static inline bool thp_migration_supported(void) +{ + return false; +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* _LINUX_HUGE_MM_H */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 0e849715e5be22dc5733624148085e3afff65357..3c07ace5b431094f3603438f9c27550007012101 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -175,9 +175,8 @@ extern struct cred init_cred; #ifdef CONFIG_RT_MUTEXES # define INIT_RT_MUTEXES(tsk) \ - .pi_waiters = RB_ROOT, \ - .pi_top_task = NULL, \ - .pi_waiters_leftmost = NULL, + .pi_waiters = RB_ROOT_CACHED, \ + .pi_top_task = NULL, #else # define INIT_RT_MUTEXES(tsk) #endif diff --git a/include/linux/interval_tree.h b/include/linux/interval_tree.h index 724556aa3c957f66f39f2487149c2242471a9f1f..202ee1283f4bd59d984a1066bda5881380a4268e 100644 --- a/include/linux/interval_tree.h +++ b/include/linux/interval_tree.h @@ -11,13 +11,15 @@ struct interval_tree_node { }; extern void -interval_tree_insert(struct interval_tree_node *node, struct rb_root *root); +interval_tree_insert(struct interval_tree_node *node, + struct rb_root_cached *root); extern void -interval_tree_remove(struct interval_tree_node *node, struct rb_root *root); +interval_tree_remove(struct interval_tree_node *node, + struct rb_root_cached *root); extern struct interval_tree_node * -interval_tree_iter_first(struct rb_root *root, +interval_tree_iter_first(struct rb_root_cached *root, unsigned long start, unsigned long last); extern struct interval_tree_node * diff --git a/include/linux/interval_tree_generic.h b/include/linux/interval_tree_generic.h index 58370e1862ad6bbddb8ea3e452ad65a5404b9220..1f97ce26cccc1297caa110b9859b4c3ee8efcb9c 100644 --- a/include/linux/interval_tree_generic.h +++ b/include/linux/interval_tree_generic.h @@ -33,7 +33,7 @@ * ITSTATIC: 'static' or empty * ITPREFIX: prefix to use for the inline tree definitions * - * Note - before using this, please consider if non-generic version + * Note - before using this, please consider if generic version * (interval_tree.h) would work for you... */ @@ -65,11 +65,13 @@ RB_DECLARE_CALLBACKS(static, ITPREFIX ## _augment, ITSTRUCT, ITRB, \ \ /* Insert / remove interval nodes from the tree */ \ \ -ITSTATIC void ITPREFIX ## _insert(ITSTRUCT *node, struct rb_root *root) \ +ITSTATIC void ITPREFIX ## _insert(ITSTRUCT *node, \ + struct rb_root_cached *root) \ { \ - struct rb_node **link = &root->rb_node, *rb_parent = NULL; \ + struct rb_node **link = &root->rb_root.rb_node, *rb_parent = NULL; \ ITTYPE start = ITSTART(node), last = ITLAST(node); \ ITSTRUCT *parent; \ + bool leftmost = true; \ \ while (*link) { \ rb_parent = *link; \ @@ -78,18 +80,22 @@ ITSTATIC void ITPREFIX ## _insert(ITSTRUCT *node, struct rb_root *root) \ parent->ITSUBTREE = last; \ if (start < ITSTART(parent)) \ link = &parent->ITRB.rb_left; \ - else \ + else { \ link = &parent->ITRB.rb_right; \ + leftmost = false; \ + } \ } \ \ node->ITSUBTREE = last; \ rb_link_node(&node->ITRB, rb_parent, link); \ - rb_insert_augmented(&node->ITRB, root, &ITPREFIX ## _augment); \ + rb_insert_augmented_cached(&node->ITRB, root, \ + leftmost, &ITPREFIX ## _augment); \ } \ \ -ITSTATIC void ITPREFIX ## _remove(ITSTRUCT *node, struct rb_root *root) \ +ITSTATIC void ITPREFIX ## _remove(ITSTRUCT *node, \ + struct rb_root_cached *root) \ { \ - rb_erase_augmented(&node->ITRB, root, &ITPREFIX ## _augment); \ + rb_erase_augmented_cached(&node->ITRB, root, &ITPREFIX ## _augment); \ } \ \ /* \ @@ -140,15 +146,35 @@ ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \ } \ \ ITSTATIC ITSTRUCT * \ -ITPREFIX ## _iter_first(struct rb_root *root, ITTYPE start, ITTYPE last) \ +ITPREFIX ## _iter_first(struct rb_root_cached *root, \ + ITTYPE start, ITTYPE last) \ { \ - ITSTRUCT *node; \ + ITSTRUCT *node, *leftmost; \ \ - if (!root->rb_node) \ + if (!root->rb_root.rb_node) \ return NULL; \ - node = rb_entry(root->rb_node, ITSTRUCT, ITRB); \ + \ + /* \ + * Fastpath range intersection/overlap between A: [a0, a1] and \ + * B: [b0, b1] is given by: \ + * \ + * a0 <= b1 && b0 <= a1 \ + * \ + * ... where A holds the lock range and B holds the smallest \ + * 'start' and largest 'last' in the tree. For the later, we \ + * rely on the root node, which by augmented interval tree \ + * property, holds the largest value in its last-in-subtree. \ + * This allows mitigating some of the tree walk overhead for \ + * for non-intersecting ranges, maintained and consulted in O(1). \ + */ \ + node = rb_entry(root->rb_root.rb_node, ITSTRUCT, ITRB); \ if (node->ITSUBTREE < start) \ return NULL; \ + \ + leftmost = rb_entry(root->rb_leftmost, ITSTRUCT, ITRB); \ + if (ITSTART(leftmost) > last) \ + return NULL; \ + \ return ITPREFIX ## _subtree_search(node, start, last); \ } \ \ diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 6230064d7f95d072d61e2bc0eedd341463880d32..f5cf32e800411ecafd0aef552d7ae675dceefeb7 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -130,6 +130,8 @@ enum { IORES_DESC_ACPI_NV_STORAGE = 3, IORES_DESC_PERSISTENT_MEMORY = 4, IORES_DESC_PERSISTENT_MEMORY_LEGACY = 5, + IORES_DESC_DEVICE_PRIVATE_MEMORY = 6, + IORES_DESC_DEVICE_PUBLIC_MEMORY = 7, }; /* helpers to define resources */ diff --git a/include/linux/ipc.h b/include/linux/ipc.h index fadd579d577dc8aafd7c100ea51fe2bf630c76a0..92a2ccff80c5cf287e2447c75dc505b24bcd62d8 100644 --- a/include/linux/ipc.h +++ b/include/linux/ipc.h @@ -3,7 +3,9 @@ #include #include +#include #include +#include #define IPCMNI 32768 /* <= MAX_INT limit for ipc arrays (including sysctl changes) */ @@ -21,8 +23,10 @@ struct kern_ipc_perm { unsigned long seq; void *security; + struct rhash_head khtnode; + struct rcu_head rcu; - atomic_t refcount; + refcount_t refcount; } ____cacheline_aligned_in_smp __randomize_layout; #endif /* _LINUX_IPC_H */ diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 65327ee0936b314fe38fffee3ffb5f882d8883ac..83f0bf7a587d55c5e7f7d46d4e239b36c1b3c289 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -7,19 +7,23 @@ #include #include #include +#include +#include struct user_namespace; struct ipc_ids { int in_use; unsigned short seq; + bool tables_initialized; struct rw_semaphore rwsem; struct idr ipcs_idr; int next_id; + struct rhashtable key_ht; }; struct ipc_namespace { - atomic_t count; + refcount_t count; struct ipc_ids ids[3]; int sem_ctls[4]; @@ -118,7 +122,7 @@ extern struct ipc_namespace *copy_ipcs(unsigned long flags, static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) { if (ns) - atomic_inc(&ns->count); + refcount_inc(&ns->count); return ns; } diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 6607225d0ea49fc396fc5cdc4fc6903d04663009..0ad4c3044cf9333fa9664c2be0850e9596ab2206 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -78,8 +78,11 @@ #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) #define DIV_ROUND_UP __KERNEL_DIV_ROUND_UP -#define DIV_ROUND_UP_ULL(ll,d) \ - ({ unsigned long long _tmp = (ll)+(d)-1; do_div(_tmp, d); _tmp; }) + +#define DIV_ROUND_DOWN_ULL(ll, d) \ + ({ unsigned long long _tmp = (ll); do_div(_tmp, d); _tmp; }) + +#define DIV_ROUND_UP_ULL(ll, d) DIV_ROUND_DOWN_ULL((ll) + (d) - 1, (d)) #if BITS_PER_LONG == 32 # define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP_ULL(ll, d) diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 655082c88fd93b5057fc641d196aae66d805850b..40c89ad4bea6adf829eeb54f34d2328a577c4b1e 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -19,6 +19,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include #include #include #include @@ -44,63 +45,4 @@ static inline int request_module_nowait(const char *name, ...) { return -ENOSYS; #define try_then_request_module(x, mod...) (x) #endif - -struct cred; -struct file; - -#define UMH_NO_WAIT 0 /* don't wait at all */ -#define UMH_WAIT_EXEC 1 /* wait for the exec, but not the process */ -#define UMH_WAIT_PROC 2 /* wait for the process to complete */ -#define UMH_KILLABLE 4 /* wait for EXEC/PROC killable */ - -struct subprocess_info { - struct work_struct work; - struct completion *complete; - const char *path; - char **argv; - char **envp; - int wait; - int retval; - int (*init)(struct subprocess_info *info, struct cred *new); - void (*cleanup)(struct subprocess_info *info); - void *data; -} __randomize_layout; - -extern int -call_usermodehelper(const char *path, char **argv, char **envp, int wait); - -extern struct subprocess_info * -call_usermodehelper_setup(const char *path, char **argv, char **envp, - gfp_t gfp_mask, - int (*init)(struct subprocess_info *info, struct cred *new), - void (*cleanup)(struct subprocess_info *), void *data); - -extern int -call_usermodehelper_exec(struct subprocess_info *info, int wait); - -extern struct ctl_table usermodehelper_table[]; - -enum umh_disable_depth { - UMH_ENABLED = 0, - UMH_FREEZING, - UMH_DISABLED, -}; - -extern int __usermodehelper_disable(enum umh_disable_depth depth); -extern void __usermodehelper_set_disable_depth(enum umh_disable_depth depth); - -static inline int usermodehelper_disable(void) -{ - return __usermodehelper_disable(UMH_DISABLED); -} - -static inline void usermodehelper_enable(void) -{ - __usermodehelper_set_disable_depth(UMH_ENABLED); -} - -extern int usermodehelper_read_trylock(void); -extern long usermodehelper_read_lock_wait(long timeout); -extern void usermodehelper_read_unlock(void); - #endif /* __LINUX_KMOD_H__ */ diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 5e6e4cc36ff4cebf81e0b822063a96dff458412c..0995e1a2b4583138953d395e3d10b389b0cb0609 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -133,6 +133,17 @@ extern int __remove_pages(struct zone *zone, unsigned long start_pfn, extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, bool want_memblock); +#ifndef CONFIG_ARCH_HAS_ADD_PAGES +static inline int add_pages(int nid, unsigned long start_pfn, + unsigned long nr_pages, bool want_memblock) +{ + return __add_pages(nid, start_pfn, nr_pages, want_memblock); +} +#else /* ARCH_HAS_ADD_PAGES */ +int add_pages(int nid, unsigned long start_pfn, + unsigned long nr_pages, bool want_memblock); +#endif /* ARCH_HAS_ADD_PAGES */ + #ifdef CONFIG_NUMA extern int memory_add_physaddr_to_nid(u64 start); #else diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 93416196ba64f6b4747ad9f37aaf5a913ff3ac29..79f8ba7c38940953182d05a59df970d06acc13fc 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -4,6 +4,8 @@ #include #include +#include + struct resource; struct device; @@ -35,24 +37,107 @@ static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) } #endif +/* + * Specialize ZONE_DEVICE memory into multiple types each having differents + * usage. + * + * MEMORY_DEVICE_HOST: + * Persistent device memory (pmem): struct page might be allocated in different + * memory and architecture might want to perform special actions. It is similar + * to regular memory, in that the CPU can access it transparently. However, + * it is likely to have different bandwidth and latency than regular memory. + * See Documentation/nvdimm/nvdimm.txt for more information. + * + * MEMORY_DEVICE_PRIVATE: + * Device memory that is not directly addressable by the CPU: CPU can neither + * read nor write private memory. In this case, we do still have struct pages + * backing the device memory. Doing so simplifies the implementation, but it is + * important to remember that there are certain points at which the struct page + * must be treated as an opaque object, rather than a "normal" struct page. + * + * A more complete discussion of unaddressable memory may be found in + * include/linux/hmm.h and Documentation/vm/hmm.txt. + * + * MEMORY_DEVICE_PUBLIC: + * Device memory that is cache coherent from device and CPU point of view. This + * is use on platform that have an advance system bus (like CAPI or CCIX). A + * driver can hotplug the device memory using ZONE_DEVICE and with that memory + * type. Any page of a process can be migrated to such memory. However no one + * should be allow to pin such memory so that it can always be evicted. + */ +enum memory_type { + MEMORY_DEVICE_HOST = 0, + MEMORY_DEVICE_PRIVATE, + MEMORY_DEVICE_PUBLIC, +}; + +/* + * For MEMORY_DEVICE_PRIVATE we use ZONE_DEVICE and extend it with two + * callbacks: + * page_fault() + * page_free() + * + * Additional notes about MEMORY_DEVICE_PRIVATE may be found in + * include/linux/hmm.h and Documentation/vm/hmm.txt. There is also a brief + * explanation in include/linux/memory_hotplug.h. + * + * The page_fault() callback must migrate page back, from device memory to + * system memory, so that the CPU can access it. This might fail for various + * reasons (device issues, device have been unplugged, ...). When such error + * conditions happen, the page_fault() callback must return VM_FAULT_SIGBUS and + * set the CPU page table entry to "poisoned". + * + * Note that because memory cgroup charges are transferred to the device memory, + * this should never fail due to memory restrictions. However, allocation + * of a regular system page might still fail because we are out of memory. If + * that happens, the page_fault() callback must return VM_FAULT_OOM. + * + * The page_fault() callback can also try to migrate back multiple pages in one + * chunk, as an optimization. It must, however, prioritize the faulting address + * over all the others. + * + * + * The page_free() callback is called once the page refcount reaches 1 + * (ZONE_DEVICE pages never reach 0 refcount unless there is a refcount bug. + * This allows the device driver to implement its own memory management.) + * + * For MEMORY_DEVICE_PUBLIC only the page_free() callback matter. + */ +typedef int (*dev_page_fault_t)(struct vm_area_struct *vma, + unsigned long addr, + const struct page *page, + unsigned int flags, + pmd_t *pmdp); +typedef void (*dev_page_free_t)(struct page *page, void *data); + /** * struct dev_pagemap - metadata for ZONE_DEVICE mappings + * @page_fault: callback when CPU fault on an unaddressable device page + * @page_free: free page callback when page refcount reaches 1 * @altmap: pre-allocated/reserved memory for vmemmap allocations * @res: physical address range covered by @ref * @ref: reference count that pins the devm_memremap_pages() mapping * @dev: host device of the mapping for debug + * @data: private data pointer for page_free() + * @type: memory type: see MEMORY_* in memory_hotplug.h */ struct dev_pagemap { + dev_page_fault_t page_fault; + dev_page_free_t page_free; struct vmem_altmap *altmap; const struct resource *res; struct percpu_ref *ref; struct device *dev; + void *data; + enum memory_type type; }; #ifdef CONFIG_ZONE_DEVICE void *devm_memremap_pages(struct device *dev, struct resource *res, struct percpu_ref *ref, struct vmem_altmap *altmap); struct dev_pagemap *find_dev_pagemap(resource_size_t phys); + +static inline bool is_zone_device_page(const struct page *page); #else static inline void *devm_memremap_pages(struct device *dev, struct resource *res, struct percpu_ref *ref, @@ -73,6 +158,20 @@ static inline struct dev_pagemap *find_dev_pagemap(resource_size_t phys) } #endif +#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC) +static inline bool is_device_private_page(const struct page *page) +{ + return is_zone_device_page(page) && + page->pgmap->type == MEMORY_DEVICE_PRIVATE; +} + +static inline bool is_device_public_page(const struct page *page) +{ + return is_zone_device_page(page) && + page->pgmap->type == MEMORY_DEVICE_PUBLIC; +} +#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ + /** * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn * @pfn: page frame number to lookup page_map diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 3e0d405dc842d5dd65b28cb710b4ce38008ca64b..643c7ae7d7b4d8dfad5fa20faf255334f133176f 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -35,15 +35,28 @@ static inline struct page *new_page_nodemask(struct page *page, int preferred_nid, nodemask_t *nodemask) { gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL; + unsigned int order = 0; + struct page *new_page = NULL; if (PageHuge(page)) return alloc_huge_page_nodemask(page_hstate(compound_head(page)), preferred_nid, nodemask); + if (thp_migration_supported() && PageTransHuge(page)) { + order = HPAGE_PMD_ORDER; + gfp_mask |= GFP_TRANSHUGE; + } + if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) gfp_mask |= __GFP_HIGHMEM; - return __alloc_pages_nodemask(gfp_mask, 0, preferred_nid, nodemask); + new_page = __alloc_pages_nodemask(gfp_mask, order, + preferred_nid, nodemask); + + if (new_page && PageTransHuge(page)) + prep_transhuge_page(new_page); + + return new_page; } #ifdef CONFIG_MIGRATION @@ -59,6 +72,7 @@ extern void putback_movable_page(struct page *page); extern int migrate_prep(void); extern int migrate_prep_local(void); +extern void migrate_page_states(struct page *newpage, struct page *page); extern void migrate_page_copy(struct page *newpage, struct page *page); extern int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page); @@ -79,6 +93,10 @@ static inline int isolate_movable_page(struct page *page, isolate_mode_t mode) static inline int migrate_prep(void) { return -ENOSYS; } static inline int migrate_prep_local(void) { return -ENOSYS; } +static inline void migrate_page_states(struct page *newpage, struct page *page) +{ +} + static inline void migrate_page_copy(struct page *newpage, struct page *page) {} @@ -138,4 +156,136 @@ static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm, } #endif /* CONFIG_NUMA_BALANCING && CONFIG_TRANSPARENT_HUGEPAGE*/ + +#ifdef CONFIG_MIGRATION + +/* + * Watch out for PAE architecture, which has an unsigned long, and might not + * have enough bits to store all physical address and flags. So far we have + * enough room for all our flags. + */ +#define MIGRATE_PFN_VALID (1UL << 0) +#define MIGRATE_PFN_MIGRATE (1UL << 1) +#define MIGRATE_PFN_LOCKED (1UL << 2) +#define MIGRATE_PFN_WRITE (1UL << 3) +#define MIGRATE_PFN_DEVICE (1UL << 4) +#define MIGRATE_PFN_ERROR (1UL << 5) +#define MIGRATE_PFN_SHIFT 6 + +static inline struct page *migrate_pfn_to_page(unsigned long mpfn) +{ + if (!(mpfn & MIGRATE_PFN_VALID)) + return NULL; + return pfn_to_page(mpfn >> MIGRATE_PFN_SHIFT); +} + +static inline unsigned long migrate_pfn(unsigned long pfn) +{ + return (pfn << MIGRATE_PFN_SHIFT) | MIGRATE_PFN_VALID; +} + +/* + * struct migrate_vma_ops - migrate operation callback + * + * @alloc_and_copy: alloc destination memory and copy source memory to it + * @finalize_and_map: allow caller to map the successfully migrated pages + * + * + * The alloc_and_copy() callback happens once all source pages have been locked, + * unmapped and checked (checked whether pinned or not). All pages that can be + * migrated will have an entry in the src array set with the pfn value of the + * page and with the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set (other + * flags might be set but should be ignored by the callback). + * + * The alloc_and_copy() callback can then allocate destination memory and copy + * source memory to it for all those entries (ie with MIGRATE_PFN_VALID and + * MIGRATE_PFN_MIGRATE flag set). Once these are allocated and copied, the + * callback must update each corresponding entry in the dst array with the pfn + * value of the destination page and with the MIGRATE_PFN_VALID and + * MIGRATE_PFN_LOCKED flags set (destination pages must have their struct pages + * locked, via lock_page()). + * + * At this point the alloc_and_copy() callback is done and returns. + * + * Note that the callback does not have to migrate all the pages that are + * marked with MIGRATE_PFN_MIGRATE flag in src array unless this is a migration + * from device memory to system memory (ie the MIGRATE_PFN_DEVICE flag is also + * set in the src array entry). If the device driver cannot migrate a device + * page back to system memory, then it must set the corresponding dst array + * entry to MIGRATE_PFN_ERROR. This will trigger a SIGBUS if CPU tries to + * access any of the virtual addresses originally backed by this page. Because + * a SIGBUS is such a severe result for the userspace process, the device + * driver should avoid setting MIGRATE_PFN_ERROR unless it is really in an + * unrecoverable state. + * + * For empty entry inside CPU page table (pte_none() or pmd_none() is true) we + * do set MIGRATE_PFN_MIGRATE flag inside the corresponding source array thus + * allowing device driver to allocate device memory for those unback virtual + * address. For this the device driver simply have to allocate device memory + * and properly set the destination entry like for regular migration. Note that + * this can still fails and thus inside the device driver must check if the + * migration was successful for those entry inside the finalize_and_map() + * callback just like for regular migration. + * + * THE alloc_and_copy() CALLBACK MUST NOT CHANGE ANY OF THE SRC ARRAY ENTRIES + * OR BAD THINGS WILL HAPPEN ! + * + * + * The finalize_and_map() callback happens after struct page migration from + * source to destination (destination struct pages are the struct pages for the + * memory allocated by the alloc_and_copy() callback). Migration can fail, and + * thus the finalize_and_map() allows the driver to inspect which pages were + * successfully migrated, and which were not. Successfully migrated pages will + * have the MIGRATE_PFN_MIGRATE flag set for their src array entry. + * + * It is safe to update device page table from within the finalize_and_map() + * callback because both destination and source page are still locked, and the + * mmap_sem is held in read mode (hence no one can unmap the range being + * migrated). + * + * Once callback is done cleaning up things and updating its page table (if it + * chose to do so, this is not an obligation) then it returns. At this point, + * the HMM core will finish up the final steps, and the migration is complete. + * + * THE finalize_and_map() CALLBACK MUST NOT CHANGE ANY OF THE SRC OR DST ARRAY + * ENTRIES OR BAD THINGS WILL HAPPEN ! + */ +struct migrate_vma_ops { + void (*alloc_and_copy)(struct vm_area_struct *vma, + const unsigned long *src, + unsigned long *dst, + unsigned long start, + unsigned long end, + void *private); + void (*finalize_and_map)(struct vm_area_struct *vma, + const unsigned long *src, + const unsigned long *dst, + unsigned long start, + unsigned long end, + void *private); +}; + +#if defined(CONFIG_MIGRATE_VMA_HELPER) +int migrate_vma(const struct migrate_vma_ops *ops, + struct vm_area_struct *vma, + unsigned long start, + unsigned long end, + unsigned long *src, + unsigned long *dst, + void *private); +#else +static inline int migrate_vma(const struct migrate_vma_ops *ops, + struct vm_area_struct *vma, + unsigned long start, + unsigned long end, + unsigned long *src, + unsigned long *dst, + void *private) +{ + return -EINVAL; +} +#endif /* IS_ENABLED(CONFIG_MIGRATE_VMA_HELPER) */ + +#endif /* CONFIG_MIGRATION */ + #endif /* _LINUX_MIGRATE_H */ diff --git a/include/linux/migrate_mode.h b/include/linux/migrate_mode.h index ebf3d89a3919405ccd9a887178c65764a931a291..bdf66af9b937a51a661a15c7f0feda67847efe90 100644 --- a/include/linux/migrate_mode.h +++ b/include/linux/migrate_mode.h @@ -6,11 +6,16 @@ * on most operations but not ->writepage as the potential stall time * is too significant * MIGRATE_SYNC will block when migrating pages + * MIGRATE_SYNC_NO_COPY will block when migrating pages but will not copy pages + * with the CPU. Instead, page copy happens outside the migratepage() + * callback and is likely using a DMA engine. See migrate_vma() and HMM + * (mm/hmm.c) for users of this mode. */ enum migrate_mode { MIGRATE_ASYNC, MIGRATE_SYNC_LIGHT, MIGRATE_SYNC, + MIGRATE_SYNC_NO_COPY, }; #endif /* MIGRATE_MODE_H_INCLUDED */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 39db8e54c5d50a98ee9cf49eb5cf16e5271095a4..f8c10d336e42ea5b43e6f72e593eaf90027a6cc5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -23,6 +23,7 @@ #include #include #include +#include struct mempolicy; struct anon_vma; @@ -799,6 +800,28 @@ static inline bool is_zone_device_page(const struct page *page) } #endif +#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC) +void put_zone_device_private_or_public_page(struct page *page); +DECLARE_STATIC_KEY_FALSE(device_private_key); +#define IS_HMM_ENABLED static_branch_unlikely(&device_private_key) +static inline bool is_device_private_page(const struct page *page); +static inline bool is_device_public_page(const struct page *page); +#else /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ +static inline void put_zone_device_private_or_public_page(struct page *page) +{ +} +#define IS_HMM_ENABLED 0 +static inline bool is_device_private_page(const struct page *page) +{ + return false; +} +static inline bool is_device_public_page(const struct page *page) +{ + return false; +} +#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ + + static inline void get_page(struct page *page) { page = compound_head(page); @@ -814,6 +837,18 @@ static inline void put_page(struct page *page) { page = compound_head(page); + /* + * For private device pages we need to catch refcount transition from + * 2 to 1, when refcount reach one it means the private device page is + * free and we need to inform the device driver through callback. See + * include/linux/memremap.h and HMM for details. + */ + if (IS_HMM_ENABLED && unlikely(is_device_private_page(page) || + unlikely(is_device_public_page(page)))) { + put_zone_device_private_or_public_page(page); + return; + } + if (put_page_testzero(page)) __put_page(page); } @@ -1199,8 +1234,10 @@ struct zap_details { pgoff_t last_index; /* Highest page->index to unmap */ }; -struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, - pte_t pte); +struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr, + pte_t pte, bool with_public_device); +#define vm_normal_page(vma, addr, pte) _vm_normal_page(vma, addr, pte, false) + struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t pmd); @@ -1997,13 +2034,13 @@ extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t); /* interval_tree.c */ void vma_interval_tree_insert(struct vm_area_struct *node, - struct rb_root *root); + struct rb_root_cached *root); void vma_interval_tree_insert_after(struct vm_area_struct *node, struct vm_area_struct *prev, - struct rb_root *root); + struct rb_root_cached *root); void vma_interval_tree_remove(struct vm_area_struct *node, - struct rb_root *root); -struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root *root, + struct rb_root_cached *root); +struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root_cached *root, unsigned long start, unsigned long last); struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node, unsigned long start, unsigned long last); @@ -2013,11 +2050,12 @@ struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node, vma; vma = vma_interval_tree_iter_next(vma, start, last)) void anon_vma_interval_tree_insert(struct anon_vma_chain *node, - struct rb_root *root); + struct rb_root_cached *root); void anon_vma_interval_tree_remove(struct anon_vma_chain *node, - struct rb_root *root); -struct anon_vma_chain *anon_vma_interval_tree_iter_first( - struct rb_root *root, unsigned long start, unsigned long last); + struct rb_root_cached *root); +struct anon_vma_chain * +anon_vma_interval_tree_iter_first(struct rb_root_cached *root, + unsigned long start, unsigned long last); struct anon_vma_chain *anon_vma_interval_tree_iter_next( struct anon_vma_chain *node, unsigned long start, unsigned long last); #ifdef CONFIG_DEBUG_VM_RB diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index f45ad815b7d7910a56c888261710856ef96d5734..46f4ecf5479adbb2829c26e63d5e56b20e779312 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -23,6 +23,7 @@ struct address_space; struct mem_cgroup; +struct hmm; /* * Each physical page in the system has a struct page associated with @@ -503,6 +504,11 @@ struct mm_struct { atomic_long_t hugetlb_usage; #endif struct work_struct async_put_work; + +#if IS_ENABLED(CONFIG_HMM) + /* HMM needs to track a few things per mm */ + struct hmm *hmm; +#endif } __randomize_layout; extern struct mm_struct init_mm; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index e7e92c8f4883740b9fea3f636d31d583cb03ee71..356a814e7c8eb8b56179dd72bdbcf76703fa56b1 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -114,6 +114,20 @@ struct zone_padding { #define ZONE_PADDING(name) #endif +#ifdef CONFIG_NUMA +enum numa_stat_item { + NUMA_HIT, /* allocated in intended node */ + NUMA_MISS, /* allocated in non intended node */ + NUMA_FOREIGN, /* was intended here, hit elsewhere */ + NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */ + NUMA_LOCAL, /* allocation from local node */ + NUMA_OTHER, /* allocation from other node */ + NR_VM_NUMA_STAT_ITEMS +}; +#else +#define NR_VM_NUMA_STAT_ITEMS 0 +#endif + enum zone_stat_item { /* First 128 byte cacheline (assuming 64 bit words) */ NR_FREE_PAGES, @@ -131,14 +145,6 @@ enum zone_stat_item { NR_BOUNCE, #if IS_ENABLED(CONFIG_ZSMALLOC) NR_ZSPAGES, /* allocated in zsmalloc */ -#endif -#ifdef CONFIG_NUMA - NUMA_HIT, /* allocated in intended node */ - NUMA_MISS, /* allocated in non intended node */ - NUMA_FOREIGN, /* was intended here, hit elsewhere */ - NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */ - NUMA_LOCAL, /* allocation from local node */ - NUMA_OTHER, /* allocation from other node */ #endif NR_FREE_CMA_PAGES, NR_VM_ZONE_STAT_ITEMS }; @@ -276,6 +282,7 @@ struct per_cpu_pageset { struct per_cpu_pages pcp; #ifdef CONFIG_NUMA s8 expire; + u16 vm_numa_stat_diff[NR_VM_NUMA_STAT_ITEMS]; #endif #ifdef CONFIG_SMP s8 stat_threshold; @@ -496,6 +503,7 @@ struct zone { ZONE_PADDING(_pad3_) /* Zone statistics */ atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; + atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS]; } ____cacheline_internodealigned_in_smp; enum pgdat_flags { diff --git a/include/linux/pps-gpio.h b/include/linux/pps-gpio.h index 0035abe41b9a4547f70a65c94f2477b11c320f30..56f35dd3d01d382820c18283500f4a6f725c854f 100644 --- a/include/linux/pps-gpio.h +++ b/include/linux/pps-gpio.h @@ -29,4 +29,4 @@ struct pps_gpio_platform_data { const char *gpio_label; }; -#endif +#endif /* _PPS_GPIO_H */ diff --git a/include/linux/pps_kernel.h b/include/linux/pps_kernel.h index 35ac903956c7c0c282a9b0cfd74004212c8eb10d..80a980cc8d95f174a120fbd1aece4bdf7e55315b 100644 --- a/include/linux/pps_kernel.h +++ b/include/linux/pps_kernel.h @@ -22,7 +22,6 @@ #define LINUX_PPS_KERNEL_H #include - #include #include #include @@ -35,9 +34,9 @@ struct pps_device; /* The specific PPS source info */ struct pps_source_info { - char name[PPS_MAX_NAME_LEN]; /* simbolic name */ + char name[PPS_MAX_NAME_LEN]; /* symbolic name */ char path[PPS_MAX_NAME_LEN]; /* path of connected device */ - int mode; /* PPS's allowed mode */ + int mode; /* PPS allowed mode */ void (*echo)(struct pps_device *pps, int event, void *data); /* PPS echo function */ @@ -57,10 +56,10 @@ struct pps_event_time { struct pps_device { struct pps_source_info info; /* PSS source info */ - struct pps_kparams params; /* PPS's current params */ + struct pps_kparams params; /* PPS current params */ - __u32 assert_sequence; /* PPS' assert event seq # */ - __u32 clear_sequence; /* PPS' clear event seq # */ + __u32 assert_sequence; /* PPS assert event seq # */ + __u32 clear_sequence; /* PPS clear event seq # */ struct pps_ktime assert_tu; struct pps_ktime clear_tu; int current_mode; /* PPS mode at event time */ @@ -69,7 +68,7 @@ struct pps_device { wait_queue_head_t queue; /* PPS event queue */ unsigned int id; /* PPS source unique ID */ - void const *lookup_cookie; /* pps_lookup_dev only */ + void const *lookup_cookie; /* For pps_lookup_dev() only */ struct cdev cdev; struct device *dev; struct fasync_struct *async_queue; /* fasync method */ @@ -101,7 +100,7 @@ extern struct pps_device *pps_register_source( extern void pps_unregister_source(struct pps_device *pps); extern void pps_event(struct pps_device *pps, struct pps_event_time *ts, int event, void *data); -/* Look up a pps device by magic cookie */ +/* Look up a pps_device by magic cookie */ struct pps_device *pps_lookup_dev(void const *cookie); static inline void timespec_to_pps_ktime(struct pps_ktime *kt, @@ -132,4 +131,3 @@ static inline void pps_sub_ts(struct pps_event_time *ts, struct timespec64 delta } #endif /* LINUX_PPS_KERNEL_H */ - diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 2d2bf592d9db216f31093cdc01c0ef418d663af9..76124dd4e36d64e87d3bf9f816b6979a8c35e5c4 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -28,13 +28,7 @@ extern struct proc_dir_entry *proc_create_data(const char *, umode_t, const struct file_operations *, void *); -static inline struct proc_dir_entry *proc_create( - const char *name, umode_t mode, struct proc_dir_entry *parent, - const struct file_operations *proc_fops) -{ - return proc_create_data(name, mode, parent, proc_fops, NULL); -} - +struct proc_dir_entry *proc_create(const char *name, umode_t mode, struct proc_dir_entry *parent, const struct file_operations *proc_fops); extern void proc_set_size(struct proc_dir_entry *, loff_t); extern void proc_set_user(struct proc_dir_entry *, kuid_t, kgid_t); extern void *PDE_DATA(const struct inode *); diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index e585018498d594f7c12abe6d3935f321009b3cd8..d574361943ea832532cf93177e97a8c62d6c3430 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -44,10 +44,25 @@ struct rb_root { struct rb_node *rb_node; }; +/* + * Leftmost-cached rbtrees. + * + * We do not cache the rightmost node based on footprint + * size vs number of potential users that could benefit + * from O(1) rb_last(). Just not worth it, users that want + * this feature can always implement the logic explicitly. + * Furthermore, users that want to cache both pointers may + * find it a bit asymmetric, but that's ok. + */ +struct rb_root_cached { + struct rb_root rb_root; + struct rb_node *rb_leftmost; +}; #define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3)) #define RB_ROOT (struct rb_root) { NULL, } +#define RB_ROOT_CACHED (struct rb_root_cached) { {NULL, }, NULL } #define rb_entry(ptr, type, member) container_of(ptr, type, member) #define RB_EMPTY_ROOT(root) (READ_ONCE((root)->rb_node) == NULL) @@ -69,6 +84,12 @@ extern struct rb_node *rb_prev(const struct rb_node *); extern struct rb_node *rb_first(const struct rb_root *); extern struct rb_node *rb_last(const struct rb_root *); +extern void rb_insert_color_cached(struct rb_node *, + struct rb_root_cached *, bool); +extern void rb_erase_cached(struct rb_node *node, struct rb_root_cached *); +/* Same as rb_first(), but O(1) */ +#define rb_first_cached(root) (root)->rb_leftmost + /* Postorder iteration - always visit the parent after its children */ extern struct rb_node *rb_first_postorder(const struct rb_root *); extern struct rb_node *rb_next_postorder(const struct rb_node *); diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h index 9702b6e183bc6ab2432573170e1b43db4ff19288..6bfd2b581f75a464e13d3d749ebe5d639a32551f 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h @@ -41,7 +41,9 @@ struct rb_augment_callbacks { void (*rotate)(struct rb_node *old, struct rb_node *new); }; -extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, +extern void __rb_insert_augmented(struct rb_node *node, + struct rb_root *root, + bool newleft, struct rb_node **leftmost, void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); /* * Fixup the rbtree and update the augmented information when rebalancing. @@ -57,7 +59,16 @@ static inline void rb_insert_augmented(struct rb_node *node, struct rb_root *root, const struct rb_augment_callbacks *augment) { - __rb_insert_augmented(node, root, augment->rotate); + __rb_insert_augmented(node, root, false, NULL, augment->rotate); +} + +static inline void +rb_insert_augmented_cached(struct rb_node *node, + struct rb_root_cached *root, bool newleft, + const struct rb_augment_callbacks *augment) +{ + __rb_insert_augmented(node, &root->rb_root, + newleft, &root->rb_leftmost, augment->rotate); } #define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield, \ @@ -150,6 +161,7 @@ extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root, static __always_inline struct rb_node * __rb_erase_augmented(struct rb_node *node, struct rb_root *root, + struct rb_node **leftmost, const struct rb_augment_callbacks *augment) { struct rb_node *child = node->rb_right; @@ -157,6 +169,9 @@ __rb_erase_augmented(struct rb_node *node, struct rb_root *root, struct rb_node *parent, *rebalance; unsigned long pc; + if (leftmost && node == *leftmost) + *leftmost = rb_next(node); + if (!tmp) { /* * Case 1: node to erase has no more than 1 child (easy!) @@ -256,9 +271,21 @@ static __always_inline void rb_erase_augmented(struct rb_node *node, struct rb_root *root, const struct rb_augment_callbacks *augment) { - struct rb_node *rebalance = __rb_erase_augmented(node, root, augment); + struct rb_node *rebalance = __rb_erase_augmented(node, root, + NULL, augment); if (rebalance) __rb_erase_color(rebalance, root, augment->rotate); } +static __always_inline void +rb_erase_augmented_cached(struct rb_node *node, struct rb_root_cached *root, + const struct rb_augment_callbacks *augment) +{ + struct rb_node *rebalance = __rb_erase_augmented(node, &root->rb_root, + &root->rb_leftmost, + augment); + if (rebalance) + __rb_erase_color(rebalance, &root->rb_root, augment->rotate); +} + #endif /* _LINUX_RBTREE_AUGMENTED_H */ diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 7d56a7ea2b2e8cdcf471da6aaa6ac0be3c51acaa..361c08e35dbc388f0acd2891706d4df8f33026b6 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -127,7 +127,7 @@ struct rhashtable; * @head_offset: Offset of rhash_head in struct to be hashed * @max_size: Maximum size while expanding * @min_size: Minimum size while shrinking - * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) + * @locks_mul: Number of bucket locks to allocate per cpu (default: 32) * @automatic_shrinking: Enable automatic shrinking of tables * @nulls_base: Base value to generate nulls marker * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 43ef2c30cb0f59f83c05d23eea4a8aad87edefd1..733d3d8181e22fc2a7effe78add0f22acc3c147c 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -55,7 +55,9 @@ struct anon_vma { * is serialized by a system wide lock only visible to * mm_take_all_locks() (mm_all_locks_mutex). */ - struct rb_root rb_root; /* Interval tree of private "related" vmas */ + + /* Interval tree of private "related" vmas */ + struct rb_root_cached rb_root; }; /* @@ -93,8 +95,9 @@ enum ttu_flags { TTU_BATCH_FLUSH = 0x40, /* Batch TLB flushes where possible * and caller guarantees they will * do a final flush if necessary */ - TTU_RMAP_LOCKED = 0x80 /* do not grab rmap lock: + TTU_RMAP_LOCKED = 0x80, /* do not grab rmap lock: * caller holds it */ + TTU_SPLIT_FREEZE = 0x100, /* freeze pte under splitting thp */ }; #ifdef CONFIG_MMU diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index 44fd002f7cd54472176c9a920ce43645c8ff641c..53fcbe9de7fd11bd18b9cdd8ce406588f3297410 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -22,18 +22,17 @@ extern int max_lock_depth; /* for sysctl */ * The rt_mutex structure * * @wait_lock: spinlock to protect the structure - * @waiters: rbtree root to enqueue waiters in priority order - * @waiters_leftmost: top waiter + * @waiters: rbtree root to enqueue waiters in priority order; + * caches top-waiter (leftmost node). * @owner: the mutex owner */ struct rt_mutex { raw_spinlock_t wait_lock; - struct rb_root waiters; - struct rb_node *waiters_leftmost; + struct rb_root_cached waiters; struct task_struct *owner; #ifdef CONFIG_DEBUG_RT_MUTEXES int save_state; - const char *name, *file; + const char *name, *file; int line; void *magic; #endif @@ -84,7 +83,7 @@ do { \ #define __RT_MUTEX_INITIALIZER(mutexname) \ { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ - , .waiters = RB_ROOT \ + , .waiters = RB_ROOT_CACHED \ , .owner = NULL \ __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)} diff --git a/include/linux/sched.h b/include/linux/sched.h index 68b38335d33cb9365dd1febeca54ec27b9d2ff04..92fb8dd5a9e4884bfde2225bb0c6d933ed13c660 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -812,8 +812,7 @@ struct task_struct { #ifdef CONFIG_RT_MUTEXES /* PI waiters blocked on a rt_mutex held by this task: */ - struct rb_root pi_waiters; - struct rb_node *pi_waiters_leftmost; + struct rb_root_cached pi_waiters; /* Updated under owner's pi_lock and rq lock */ struct task_struct *pi_top_task; /* Deadlock detection and priority inheritance handling: */ diff --git a/include/linux/string.h b/include/linux/string.h index a467e617eeb08cde480b7e5e2b65ffa208d2243c..c8bdafffd2f02f7e0d5533ae06603024f01704da 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -99,6 +99,36 @@ extern __kernel_size_t strcspn(const char *,const char *); #ifndef __HAVE_ARCH_MEMSET extern void * memset(void *,int,__kernel_size_t); #endif + +#ifndef __HAVE_ARCH_MEMSET16 +extern void *memset16(uint16_t *, uint16_t, __kernel_size_t); +#endif + +#ifndef __HAVE_ARCH_MEMSET32 +extern void *memset32(uint32_t *, uint32_t, __kernel_size_t); +#endif + +#ifndef __HAVE_ARCH_MEMSET64 +extern void *memset64(uint64_t *, uint64_t, __kernel_size_t); +#endif + +static inline void *memset_l(unsigned long *p, unsigned long v, + __kernel_size_t n) +{ + if (BITS_PER_LONG == 32) + return memset32((uint32_t *)p, v, n); + else + return memset64((uint64_t *)p, v, n); +} + +static inline void *memset_p(void **p, void *v, __kernel_size_t n) +{ + if (BITS_PER_LONG == 32) + return memset32((uint32_t *)p, (uintptr_t)v, n); + else + return memset64((uint64_t *)p, (uintptr_t)v, n); +} + #ifndef __HAVE_ARCH_MEMCPY extern void * memcpy(void *,const void *,__kernel_size_t); #endif diff --git a/include/linux/swap.h b/include/linux/swap.h index 8bf3487fb2046b1a03e4d28fca762c35281cc219..8a807292037f9ff956980260ba61c0c4c22dd07f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -50,6 +50,23 @@ static inline int current_is_kswapd(void) * actions on faults. */ +/* + * Unaddressable device memory support. See include/linux/hmm.h and + * Documentation/vm/hmm.txt. Short description is we need struct pages for + * device memory that is unaddressable (inaccessible) by CPU, so that we can + * migrate part of a process memory to device memory. + * + * When a page is migrated from CPU to device, we set the CPU page table entry + * to a special SWP_DEVICE_* entry. + */ +#ifdef CONFIG_DEVICE_PRIVATE +#define SWP_DEVICE_NUM 2 +#define SWP_DEVICE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM) +#define SWP_DEVICE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+1) +#else +#define SWP_DEVICE_NUM 0 +#endif + /* * NUMA node memory migration support */ @@ -72,7 +89,8 @@ static inline int current_is_kswapd(void) #endif #define MAX_SWAPFILES \ - ((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM) + ((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \ + SWP_MIGRATION_NUM - SWP_HWPOISON_NUM) /* * Magic header for a swap area. The first part of the union is @@ -469,8 +487,8 @@ static inline void show_swap_cache_info(void) { } -#define free_swap_and_cache(swp) is_migration_entry(swp) -#define swapcache_prepare(swp) is_migration_entry(swp) +#define free_swap_and_cache(e) ({(is_migration_entry(e) || is_device_private_entry(e));}) +#define swapcache_prepare(e) ({(is_migration_entry(e) || is_device_private_entry(e));}) static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask) { diff --git a/include/linux/swapops.h b/include/linux/swapops.h index c5ff7b217ee6e1ba8d011e301e742fdce9bb0a32..291c4b53465819293aaa9c25bbed1b932f449f1f 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -100,10 +100,79 @@ static inline void *swp_to_radix_entry(swp_entry_t entry) return (void *)(value | RADIX_TREE_EXCEPTIONAL_ENTRY); } +#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) +static inline swp_entry_t make_device_private_entry(struct page *page, bool write) +{ + return swp_entry(write ? SWP_DEVICE_WRITE : SWP_DEVICE_READ, + page_to_pfn(page)); +} + +static inline bool is_device_private_entry(swp_entry_t entry) +{ + int type = swp_type(entry); + return type == SWP_DEVICE_READ || type == SWP_DEVICE_WRITE; +} + +static inline void make_device_private_entry_read(swp_entry_t *entry) +{ + *entry = swp_entry(SWP_DEVICE_READ, swp_offset(*entry)); +} + +static inline bool is_write_device_private_entry(swp_entry_t entry) +{ + return unlikely(swp_type(entry) == SWP_DEVICE_WRITE); +} + +static inline struct page *device_private_entry_to_page(swp_entry_t entry) +{ + return pfn_to_page(swp_offset(entry)); +} + +int device_private_entry_fault(struct vm_area_struct *vma, + unsigned long addr, + swp_entry_t entry, + unsigned int flags, + pmd_t *pmdp); +#else /* CONFIG_DEVICE_PRIVATE */ +static inline swp_entry_t make_device_private_entry(struct page *page, bool write) +{ + return swp_entry(0, 0); +} + +static inline void make_device_private_entry_read(swp_entry_t *entry) +{ +} + +static inline bool is_device_private_entry(swp_entry_t entry) +{ + return false; +} + +static inline bool is_write_device_private_entry(swp_entry_t entry) +{ + return false; +} + +static inline struct page *device_private_entry_to_page(swp_entry_t entry) +{ + return NULL; +} + +static inline int device_private_entry_fault(struct vm_area_struct *vma, + unsigned long addr, + swp_entry_t entry, + unsigned int flags, + pmd_t *pmdp) +{ + return VM_FAULT_SIGBUS; +} +#endif /* CONFIG_DEVICE_PRIVATE */ + #ifdef CONFIG_MIGRATION static inline swp_entry_t make_migration_entry(struct page *page, int write) { - BUG_ON(!PageLocked(page)); + BUG_ON(!PageLocked(compound_head(page))); + return swp_entry(write ? SWP_MIGRATION_WRITE : SWP_MIGRATION_READ, page_to_pfn(page)); } @@ -126,7 +195,7 @@ static inline struct page *migration_entry_to_page(swp_entry_t entry) * Any use of migration entries may only occur while the * corresponding page is locked */ - BUG_ON(!PageLocked(p)); + BUG_ON(!PageLocked(compound_head(p))); return p; } @@ -148,7 +217,11 @@ static inline int is_migration_entry(swp_entry_t swp) { return 0; } -#define migration_entry_to_page(swp) NULL +static inline struct page *migration_entry_to_page(swp_entry_t entry) +{ + return NULL; +} + static inline void make_migration_entry_read(swp_entry_t *entryp) { } static inline void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, spinlock_t *ptl) { } @@ -163,6 +236,70 @@ static inline int is_write_migration_entry(swp_entry_t entry) #endif +struct page_vma_mapped_walk; + +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION +extern void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, + struct page *page); + +extern void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, + struct page *new); + +extern void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd); + +static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd) +{ + swp_entry_t arch_entry; + + if (pmd_swp_soft_dirty(pmd)) + pmd = pmd_swp_clear_soft_dirty(pmd); + arch_entry = __pmd_to_swp_entry(pmd); + return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); +} + +static inline pmd_t swp_entry_to_pmd(swp_entry_t entry) +{ + swp_entry_t arch_entry; + + arch_entry = __swp_entry(swp_type(entry), swp_offset(entry)); + return __swp_entry_to_pmd(arch_entry); +} + +static inline int is_pmd_migration_entry(pmd_t pmd) +{ + return !pmd_present(pmd) && is_migration_entry(pmd_to_swp_entry(pmd)); +} +#else +static inline void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, + struct page *page) +{ + BUILD_BUG(); +} + +static inline void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, + struct page *new) +{ + BUILD_BUG(); +} + +static inline void pmd_migration_entry_wait(struct mm_struct *m, pmd_t *p) { } + +static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd) +{ + return swp_entry(0, 0); +} + +static inline pmd_t swp_entry_to_pmd(swp_entry_t entry) +{ + return __pmd(0); +} + +static inline int is_pmd_migration_entry(pmd_t pmd) +{ + return 0; +} +#endif + #ifdef CONFIG_MEMORY_FAILURE extern atomic_long_t num_poisoned_pages __read_mostly; diff --git a/include/linux/umh.h b/include/linux/umh.h new file mode 100644 index 0000000000000000000000000000000000000000..244aff6382208824e64c6387a38bb60e967f0548 --- /dev/null +++ b/include/linux/umh.h @@ -0,0 +1,69 @@ +#ifndef __LINUX_UMH_H__ +#define __LINUX_UMH_H__ + +#include +#include +#include +#include +#include +#include + +struct cred; +struct file; + +#define UMH_NO_WAIT 0 /* don't wait at all */ +#define UMH_WAIT_EXEC 1 /* wait for the exec, but not the process */ +#define UMH_WAIT_PROC 2 /* wait for the process to complete */ +#define UMH_KILLABLE 4 /* wait for EXEC/PROC killable */ + +struct subprocess_info { + struct work_struct work; + struct completion *complete; + const char *path; + char **argv; + char **envp; + int wait; + int retval; + int (*init)(struct subprocess_info *info, struct cred *new); + void (*cleanup)(struct subprocess_info *info); + void *data; +} __randomize_layout; + +extern int +call_usermodehelper(const char *path, char **argv, char **envp, int wait); + +extern struct subprocess_info * +call_usermodehelper_setup(const char *path, char **argv, char **envp, + gfp_t gfp_mask, + int (*init)(struct subprocess_info *info, struct cred *new), + void (*cleanup)(struct subprocess_info *), void *data); + +extern int +call_usermodehelper_exec(struct subprocess_info *info, int wait); + +extern struct ctl_table usermodehelper_table[]; + +enum umh_disable_depth { + UMH_ENABLED = 0, + UMH_FREEZING, + UMH_DISABLED, +}; + +extern int __usermodehelper_disable(enum umh_disable_depth depth); +extern void __usermodehelper_set_disable_depth(enum umh_disable_depth depth); + +static inline int usermodehelper_disable(void) +{ + return __usermodehelper_disable(UMH_DISABLED); +} + +static inline void usermodehelper_enable(void) +{ + __usermodehelper_set_disable_depth(UMH_ENABLED); +} + +extern int usermodehelper_read_trylock(void); +extern long usermodehelper_read_lock_wait(long timeout); +extern void usermodehelper_read_unlock(void); + +#endif /* __LINUX_UMH_H__ */ diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 97e11ab573f0812d7862b03d5650cd482419a399..ade7cb5f1359915f0d5036ae7884d99adc20e021 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -107,8 +107,37 @@ static inline void vm_events_fold_cpu(int cpu) * Zone and node-based page accounting with per cpu differentials. */ extern atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS]; +extern atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS]; extern atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS]; +#ifdef CONFIG_NUMA +static inline void zone_numa_state_add(long x, struct zone *zone, + enum numa_stat_item item) +{ + atomic_long_add(x, &zone->vm_numa_stat[item]); + atomic_long_add(x, &vm_numa_stat[item]); +} + +static inline unsigned long global_numa_state(enum numa_stat_item item) +{ + long x = atomic_long_read(&vm_numa_stat[item]); + + return x; +} + +static inline unsigned long zone_numa_state_snapshot(struct zone *zone, + enum numa_stat_item item) +{ + long x = atomic_long_read(&zone->vm_numa_stat[item]); + int cpu; + + for_each_online_cpu(cpu) + x += per_cpu_ptr(zone->pageset, cpu)->vm_numa_stat_diff[item]; + + return x; +} +#endif /* CONFIG_NUMA */ + static inline void zone_page_state_add(long x, struct zone *zone, enum zone_stat_item item) { @@ -194,8 +223,10 @@ static inline unsigned long node_page_state_snapshot(pg_data_t *pgdat, #ifdef CONFIG_NUMA +extern void __inc_numa_state(struct zone *zone, enum numa_stat_item item); extern unsigned long sum_zone_node_page_state(int node, - enum zone_stat_item item); + enum zone_stat_item item); +extern unsigned long sum_zone_numa_state(int node, enum numa_stat_item item); extern unsigned long node_page_state(struct pglist_data *pgdat, enum node_stat_item item); #else diff --git a/include/linux/vt_buffer.h b/include/linux/vt_buffer.h index f38c10ba3ff5ea01821ed375d25be2a403365e94..30b6e0d2a942600e234c8195551a008e4d23c039 100644 --- a/include/linux/vt_buffer.h +++ b/include/linux/vt_buffer.h @@ -13,6 +13,7 @@ #ifndef _LINUX_VT_BUFFER_H_ #define _LINUX_VT_BUFFER_H_ +#include #if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_MDA_CONSOLE) #include @@ -26,24 +27,33 @@ #ifndef VT_BUF_HAVE_MEMSETW static inline void scr_memsetw(u16 *s, u16 c, unsigned int count) { +#ifdef VT_BUF_HAVE_RW count /= 2; while (count--) scr_writew(c, s++); +#else + memset16(s, c, count / 2); +#endif } #endif #ifndef VT_BUF_HAVE_MEMCPYW static inline void scr_memcpyw(u16 *d, const u16 *s, unsigned int count) { +#ifdef VT_BUF_HAVE_RW count /= 2; while (count--) scr_writew(scr_readw(s++), d++); +#else + memcpy(d, s, count); +#endif } #endif #ifndef VT_BUF_HAVE_MEMMOVEW static inline void scr_memmovew(u16 *d, const u16 *s, unsigned int count) { +#ifdef VT_BUF_HAVE_RW if (d < s) scr_memcpyw(d, s, count); else { @@ -53,6 +63,9 @@ static inline void scr_memmovew(u16 *d, const u16 *s, unsigned int count) while (count--) scr_writew(scr_readw(--s), --d); } +#else + memmove(d, s, count); +#endif } #endif diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index fb67554aabd6e0f80c34f8966b035cbb4efba029..5eb7f5bc82485458ceccb4540597433b7a15a6fb 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -111,22 +111,25 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt, void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bound); -void rbt_ib_umem_insert(struct umem_odp_node *node, struct rb_root *root); -void rbt_ib_umem_remove(struct umem_odp_node *node, struct rb_root *root); +void rbt_ib_umem_insert(struct umem_odp_node *node, + struct rb_root_cached *root); +void rbt_ib_umem_remove(struct umem_odp_node *node, + struct rb_root_cached *root); typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end, void *cookie); /* * Call the callback on each ib_umem in the range. Returns the logical or of * the return values of the functions called. */ -int rbt_ib_umem_for_each_in_range(struct rb_root *root, u64 start, u64 end, +int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root, + u64 start, u64 end, umem_call_back cb, void *cookie); /* * Find first region intersecting with address range. * Return NULL if not found */ -struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root *root, +struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root, u64 addr, u64 length); static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e6df6804851734c0bbb5fee1ec3c773fe65f1110..bdb1279a415b39f0597a02a6eb36aa6bd5e7ded5 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1457,7 +1457,7 @@ struct ib_ucontext { struct pid *tgid; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - struct rb_root umem_tree; + struct rb_root_cached umem_tree; /* * Protects .umem_rbroot and tree, as well as odp_mrs_count and * mmu notifiers registration. diff --git a/include/uapi/linux/auto_dev-ioctl.h b/include/uapi/linux/auto_dev-ioctl.h index 744b3d0609687b82bb198ed76c158417c22f09f9..5558db8e6646bc4abb46471b129331e8d1862b90 100644 --- a/include/uapi/linux/auto_dev-ioctl.h +++ b/include/uapi/linux/auto_dev-ioctl.h @@ -16,7 +16,7 @@ #define AUTOFS_DEVICE_NAME "autofs" #define AUTOFS_DEV_IOCTL_VERSION_MAJOR 1 -#define AUTOFS_DEV_IOCTL_VERSION_MINOR 0 +#define AUTOFS_DEV_IOCTL_VERSION_MINOR 1 #define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl) diff --git a/include/uapi/linux/auto_fs4.h b/include/uapi/linux/auto_fs4.h index 7c6da423d54ee6f3298cdf479d91a900cb5b2c4c..9453e9a07c9d124fd31baf1868473fbb8fe218c2 100644 --- a/include/uapi/linux/auto_fs4.h +++ b/include/uapi/linux/auto_fs4.h @@ -155,8 +155,6 @@ enum { }; #define AUTOFS_IOC_EXPIRE_MULTI _IOW(AUTOFS_IOCTL, AUTOFS_IOC_EXPIRE_MULTI_CMD, int) -#define AUTOFS_IOC_EXPIRE_INDIRECT AUTOFS_IOC_EXPIRE_MULTI -#define AUTOFS_IOC_EXPIRE_DIRECT AUTOFS_IOC_EXPIRE_MULTI #define AUTOFS_IOC_PROTOSUBVER _IOR(AUTOFS_IOCTL, AUTOFS_IOC_PROTOSUBVER_CMD, int) #define AUTOFS_IOC_ASKUMOUNT _IOR(AUTOFS_IOCTL, AUTOFS_IOC_ASKUMOUNT_CMD, int) diff --git a/include/uapi/linux/pps.h b/include/uapi/linux/pps.h index c1cb3825a8bc805e804f88c4e759e77716c6efef..c29d6b791c0816e35b0e025b80ed8d9f0eb0400a 100644 --- a/include/uapi/linux/pps.h +++ b/include/uapi/linux/pps.h @@ -95,8 +95,8 @@ struct pps_kparams { #define PPS_CAPTURECLEAR 0x02 /* capture clear events */ #define PPS_CAPTUREBOTH 0x03 /* capture assert and clear events */ -#define PPS_OFFSETASSERT 0x10 /* apply compensation for assert ev. */ -#define PPS_OFFSETCLEAR 0x20 /* apply compensation for clear ev. */ +#define PPS_OFFSETASSERT 0x10 /* apply compensation for assert event */ +#define PPS_OFFSETCLEAR 0x20 /* apply compensation for clear event */ #define PPS_CANWAIT 0x100 /* can we wait for an event? */ #define PPS_CANPOLL 0x200 /* bit reserved for future use */ diff --git a/init/main.c b/init/main.c index 949306bb5b6ad8e8930ecd11ccac910466674081..0ee9c6866ada1d9877c6afd50e23237eab6dfb4c 100644 --- a/init/main.c +++ b/init/main.c @@ -515,12 +515,6 @@ asmlinkage __visible void __init start_kernel(void) smp_setup_processor_id(); debug_objects_early_init(); - /* - * Set up the initial canary ASAP: - */ - add_latent_entropy(); - boot_init_stack_canary(); - cgroup_init_early(); local_irq_disable(); @@ -534,6 +528,13 @@ asmlinkage __visible void __init start_kernel(void) page_address_init(); pr_notice("%s", linux_banner); setup_arch(&command_line); + /* + * Set up the the initial canary and entropy after arch + * and after adding latent and command line entropy. + */ + add_latent_entropy(); + add_device_randomness(command_line, strlen(command_line)); + boot_init_stack_canary(); mm_init_cpumask(&init_mm); setup_command_line(command_line); setup_nr_cpu_ids(); diff --git a/ipc/msg.c b/ipc/msg.c index 2c38f10d148305857a6388764b0ad1501f227bce..df82bc9a5531632da7763dbf5a1f08712788c44b 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -1011,7 +1011,7 @@ SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, } -void msg_init_ns(struct ipc_namespace *ns) +int msg_init_ns(struct ipc_namespace *ns) { ns->msg_ctlmax = MSGMAX; ns->msg_ctlmnb = MSGMNB; @@ -1019,7 +1019,7 @@ void msg_init_ns(struct ipc_namespace *ns) atomic_set(&ns->msg_bytes, 0); atomic_set(&ns->msg_hdrs, 0); - ipc_init_ids(&ns->ids[IPC_MSG_IDS]); + return ipc_init_ids(&ns->ids[IPC_MSG_IDS]); } #ifdef CONFIG_IPC_NS @@ -1027,6 +1027,7 @@ void msg_exit_ns(struct ipc_namespace *ns) { free_ipcs(ns, &msg_ids(ns), freeque); idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr); + rhashtable_destroy(&ns->ids[IPC_MSG_IDS].key_ht); } #endif @@ -1058,11 +1059,12 @@ static int sysvipc_msg_proc_show(struct seq_file *s, void *it) } #endif -void __init msg_init(void) +int __init msg_init(void) { - msg_init_ns(&init_ipc_ns); + const int err = msg_init_ns(&init_ipc_ns); ipc_init_proc_interface("sysvipc/msg", " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n", IPC_MSG_IDS, sysvipc_msg_proc_show); + return err; } diff --git a/ipc/msgutil.c b/ipc/msgutil.c index bf74eaa5c39f208ea5f1ee41db38e57349512e7d..84598025a6ade1faaf412d3ae67e5c2bda32e294 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c @@ -29,7 +29,7 @@ DEFINE_SPINLOCK(mq_lock); * and not CONFIG_IPC_NS. */ struct ipc_namespace init_ipc_ns = { - .count = ATOMIC_INIT(1), + .count = REFCOUNT_INIT(1), .user_ns = &init_user_ns, .ns.inum = PROC_IPC_INIT_INO, #ifdef CONFIG_IPC_NS diff --git a/ipc/namespace.c b/ipc/namespace.c index b4d80f9f7246732e7ca749db7a1f2e33e105531a..fc850c526698428fad7e7538e3ba673486553358 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -50,20 +50,32 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, goto fail_free; ns->ns.ops = &ipcns_operations; - atomic_set(&ns->count, 1); + refcount_set(&ns->count, 1); ns->user_ns = get_user_ns(user_ns); ns->ucounts = ucounts; - err = mq_init_ns(ns); + err = sem_init_ns(ns); if (err) goto fail_put; + err = msg_init_ns(ns); + if (err) + goto fail_destroy_sem; + err = shm_init_ns(ns); + if (err) + goto fail_destroy_msg; - sem_init_ns(ns); - msg_init_ns(ns); - shm_init_ns(ns); + err = mq_init_ns(ns); + if (err) + goto fail_destroy_shm; return ns; +fail_destroy_shm: + shm_exit_ns(ns); +fail_destroy_msg: + msg_exit_ns(ns); +fail_destroy_sem: + sem_exit_ns(ns); fail_put: put_user_ns(ns->user_ns); ns_free_inum(&ns->ns); @@ -144,7 +156,7 @@ static void free_ipc_ns(struct ipc_namespace *ns) */ void put_ipc_ns(struct ipc_namespace *ns) { - if (atomic_dec_and_lock(&ns->count, &mq_lock)) { + if (refcount_dec_and_lock(&ns->count, &mq_lock)) { mq_clear_sbinfo(ns); spin_unlock(&mq_lock); mq_put_mnt(ns); diff --git a/ipc/sem.c b/ipc/sem.c index c6c50370504cc9b5a6e9dcd0b50193414caf88a2..013c7981f3c74ce96369a1ee5835270b4cad6924 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -122,7 +122,7 @@ struct sem_undo { * that may be shared among all a CLONE_SYSVSEM task group. */ struct sem_undo_list { - atomic_t refcnt; + refcount_t refcnt; spinlock_t lock; struct list_head list_proc; }; @@ -130,8 +130,6 @@ struct sem_undo_list { #define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS]) -#define sem_checkid(sma, semid) ipc_checkid(&sma->sem_perm, semid) - static int newary(struct ipc_namespace *, struct ipc_params *); static void freeary(struct ipc_namespace *, struct kern_ipc_perm *); #ifdef CONFIG_PROC_FS @@ -185,14 +183,14 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it); #define sc_semopm sem_ctls[2] #define sc_semmni sem_ctls[3] -void sem_init_ns(struct ipc_namespace *ns) +int sem_init_ns(struct ipc_namespace *ns) { ns->sc_semmsl = SEMMSL; ns->sc_semmns = SEMMNS; ns->sc_semopm = SEMOPM; ns->sc_semmni = SEMMNI; ns->used_sems = 0; - ipc_init_ids(&ns->ids[IPC_SEM_IDS]); + return ipc_init_ids(&ns->ids[IPC_SEM_IDS]); } #ifdef CONFIG_IPC_NS @@ -200,15 +198,18 @@ void sem_exit_ns(struct ipc_namespace *ns) { free_ipcs(ns, &sem_ids(ns), freeary); idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr); + rhashtable_destroy(&ns->ids[IPC_SEM_IDS].key_ht); } #endif -void __init sem_init(void) +int __init sem_init(void) { - sem_init_ns(&init_ipc_ns); + const int err = sem_init_ns(&init_ipc_ns); + ipc_init_proc_interface("sysvipc/sem", " key semid perms nsems uid gid cuid cgid otime ctime\n", IPC_SEM_IDS, sysvipc_sem_proc_show); + return err; } /** @@ -1642,7 +1643,7 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp) if (undo_list == NULL) return -ENOMEM; spin_lock_init(&undo_list->lock); - atomic_set(&undo_list->refcnt, 1); + refcount_set(&undo_list->refcnt, 1); INIT_LIST_HEAD(&undo_list->list_proc); current->sysvsem.undo_list = undo_list; @@ -1786,7 +1787,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, if (nsops > ns->sc_semopm) return -E2BIG; if (nsops > SEMOPM_FAST) { - sops = kmalloc(sizeof(*sops)*nsops, GFP_KERNEL); + sops = kvmalloc(sizeof(*sops)*nsops, GFP_KERNEL); if (sops == NULL) return -ENOMEM; } @@ -2018,7 +2019,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, rcu_read_unlock(); out_free: if (sops != fast_sops) - kfree(sops); + kvfree(sops); return error; } @@ -2041,7 +2042,7 @@ int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) error = get_undo_list(&undo_list); if (error) return error; - atomic_inc(&undo_list->refcnt); + refcount_inc(&undo_list->refcnt); tsk->sysvsem.undo_list = undo_list; } else tsk->sysvsem.undo_list = NULL; @@ -2070,7 +2071,7 @@ void exit_sem(struct task_struct *tsk) return; tsk->sysvsem.undo_list = NULL; - if (!atomic_dec_and_test(&ulp->refcnt)) + if (!refcount_dec_and_test(&ulp->refcnt)) return; for (;;) { diff --git a/ipc/shm.c b/ipc/shm.c index 8828b4c3a19041833f390d62f38084de2f64470b..8fc97beb52348a4c0fb140b7b3377d3c4fa38b2f 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -72,14 +72,14 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp); static int sysvipc_shm_proc_show(struct seq_file *s, void *it); #endif -void shm_init_ns(struct ipc_namespace *ns) +int shm_init_ns(struct ipc_namespace *ns) { ns->shm_ctlmax = SHMMAX; ns->shm_ctlall = SHMALL; ns->shm_ctlmni = SHMMNI; ns->shm_rmid_forced = 0; ns->shm_tot = 0; - ipc_init_ids(&shm_ids(ns)); + return ipc_init_ids(&shm_ids(ns)); } /* @@ -95,7 +95,7 @@ static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) if (shp->shm_nattch) { shp->shm_perm.mode |= SHM_DEST; /* Do not find it any more */ - shp->shm_perm.key = IPC_PRIVATE; + ipc_set_key_private(&shm_ids(ns), &shp->shm_perm); shm_unlock(shp); } else shm_destroy(ns, shp); @@ -106,13 +106,15 @@ void shm_exit_ns(struct ipc_namespace *ns) { free_ipcs(ns, &shm_ids(ns), do_shm_rmid); idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr); + rhashtable_destroy(&ns->ids[IPC_SHM_IDS].key_ht); } #endif static int __init ipc_ns_init(void) { - shm_init_ns(&init_ipc_ns); - return 0; + const int err = shm_init_ns(&init_ipc_ns); + WARN(err, "ipc: sysv shm_init_ns failed: %d\n", err); + return err; } pure_initcall(ipc_ns_init); diff --git a/ipc/util.c b/ipc/util.c index 1a2cb02467ab5f33f309a4542620cbec8d18361a..78755873cc5b9e2dfe28f85e44933c282aa7d91e 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -83,27 +83,46 @@ struct ipc_proc_iface { */ static int __init ipc_init(void) { - sem_init(); - msg_init(); + int err_sem, err_msg; + + err_sem = sem_init(); + WARN(err_sem, "ipc: sysv sem_init failed: %d\n", err_sem); + err_msg = msg_init(); + WARN(err_msg, "ipc: sysv msg_init failed: %d\n", err_msg); shm_init(); - return 0; + + return err_msg ? err_msg : err_sem; } device_initcall(ipc_init); +static const struct rhashtable_params ipc_kht_params = { + .head_offset = offsetof(struct kern_ipc_perm, khtnode), + .key_offset = offsetof(struct kern_ipc_perm, key), + .key_len = FIELD_SIZEOF(struct kern_ipc_perm, key), + .locks_mul = 1, + .automatic_shrinking = true, +}; + /** * ipc_init_ids - initialise ipc identifiers * @ids: ipc identifier set * * Set up the sequence range to use for the ipc identifier range (limited - * below IPCMNI) then initialise the ids idr. + * below IPCMNI) then initialise the keys hashtable and ids idr. */ -void ipc_init_ids(struct ipc_ids *ids) +int ipc_init_ids(struct ipc_ids *ids) { + int err; ids->in_use = 0; ids->seq = 0; ids->next_id = -1; init_rwsem(&ids->rwsem); + err = rhashtable_init(&ids->key_ht, &ipc_kht_params); + if (err) + return err; idr_init(&ids->ipcs_idr); + ids->tables_initialized = true; + return 0; } #ifdef CONFIG_PROC_FS @@ -147,28 +166,20 @@ void __init ipc_init_proc_interface(const char *path, const char *header, * Returns the locked pointer to the ipc structure if found or NULL * otherwise. If key is found ipc points to the owning ipc structure * - * Called with ipc_ids.rwsem held. + * Called with writer ipc_ids.rwsem held. */ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key) { - struct kern_ipc_perm *ipc; - int next_id; - int total; - - for (total = 0, next_id = 0; total < ids->in_use; next_id++) { - ipc = idr_find(&ids->ipcs_idr, next_id); - - if (ipc == NULL) - continue; + struct kern_ipc_perm *ipcp = NULL; - if (ipc->key != key) { - total++; - continue; - } + if (likely(ids->tables_initialized)) + ipcp = rhashtable_lookup_fast(&ids->key_ht, &key, + ipc_kht_params); + if (ipcp) { rcu_read_lock(); - ipc_lock_object(ipc); - return ipc; + ipc_lock_object(ipcp); + return ipcp; } return NULL; @@ -221,18 +232,18 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size) { kuid_t euid; kgid_t egid; - int id; + int id, err; int next_id = ids->next_id; if (size > IPCMNI) size = IPCMNI; - if (ids->in_use >= size) + if (!ids->tables_initialized || ids->in_use >= size) return -ENOSPC; idr_preload(GFP_KERNEL); - atomic_set(&new->refcount, 1); + refcount_set(&new->refcount, 1); spin_lock_init(&new->lock); new->deleted = false; rcu_read_lock(); @@ -246,6 +257,15 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size) (next_id < 0) ? 0 : ipcid_to_idx(next_id), 0, GFP_NOWAIT); idr_preload_end(); + + if (id >= 0 && new->key != IPC_PRIVATE) { + err = rhashtable_insert_fast(&ids->key_ht, &new->khtnode, + ipc_kht_params); + if (err < 0) { + idr_remove(&ids->ipcs_idr, id); + id = err; + } + } if (id < 0) { spin_unlock(&new->lock); rcu_read_unlock(); @@ -377,6 +397,20 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, return err; } +/** + * ipc_kht_remove - remove an ipc from the key hashtable + * @ids: ipc identifier set + * @ipcp: ipc perm structure containing the key to remove + * + * ipc_ids.rwsem (as a writer) and the spinlock for this ID are held + * before this function is called, and remain locked on the exit. + */ +static void ipc_kht_remove(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) +{ + if (ipcp->key != IPC_PRIVATE) + rhashtable_remove_fast(&ids->key_ht, &ipcp->khtnode, + ipc_kht_params); +} /** * ipc_rmid - remove an ipc identifier @@ -391,19 +425,34 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) int lid = ipcid_to_idx(ipcp->id); idr_remove(&ids->ipcs_idr, lid); + ipc_kht_remove(ids, ipcp); ids->in_use--; ipcp->deleted = true; } +/** + * ipc_set_key_private - switch the key of an existing ipc to IPC_PRIVATE + * @ids: ipc identifier set + * @ipcp: ipc perm structure containing the key to modify + * + * ipc_ids.rwsem (as a writer) and the spinlock for this ID are held + * before this function is called, and remain locked on the exit. + */ +void ipc_set_key_private(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) +{ + ipc_kht_remove(ids, ipcp); + ipcp->key = IPC_PRIVATE; +} + int ipc_rcu_getref(struct kern_ipc_perm *ptr) { - return atomic_inc_not_zero(&ptr->refcount); + return refcount_inc_not_zero(&ptr->refcount); } void ipc_rcu_putref(struct kern_ipc_perm *ptr, void (*func)(struct rcu_head *head)) { - if (!atomic_dec_and_test(&ptr->refcount)) + if (!refcount_dec_and_test(&ptr->refcount)) return; call_rcu(&ptr->rcu, func); @@ -485,7 +534,7 @@ void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out) } /** - * ipc_obtain_object + * ipc_obtain_object_idr * @ids: ipc identifier set * @id: ipc id to look for * @@ -499,6 +548,9 @@ struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id) struct kern_ipc_perm *out; int lid = ipcid_to_idx(id); + if (unlikely(!ids->tables_initialized)) + return ERR_PTR(-EINVAL); + out = idr_find(&ids->ipcs_idr, lid); if (!out) return ERR_PTR(-EINVAL); diff --git a/ipc/util.h b/ipc/util.h index c692010e6f0a394d1d796e33fda3b6b5e46ca16f..80c9f51c3f07bed4bad42edec9f71177d69ff1e4 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -15,8 +15,8 @@ #define SEQ_MULTIPLIER (IPCMNI) -void sem_init(void); -void msg_init(void); +int sem_init(void); +int msg_init(void); void shm_init(void); struct ipc_namespace; @@ -30,17 +30,17 @@ static inline void mq_put_mnt(struct ipc_namespace *ns) { } #endif #ifdef CONFIG_SYSVIPC -void sem_init_ns(struct ipc_namespace *ns); -void msg_init_ns(struct ipc_namespace *ns); -void shm_init_ns(struct ipc_namespace *ns); +int sem_init_ns(struct ipc_namespace *ns); +int msg_init_ns(struct ipc_namespace *ns); +int shm_init_ns(struct ipc_namespace *ns); void sem_exit_ns(struct ipc_namespace *ns); void msg_exit_ns(struct ipc_namespace *ns); void shm_exit_ns(struct ipc_namespace *ns); #else -static inline void sem_init_ns(struct ipc_namespace *ns) { } -static inline void msg_init_ns(struct ipc_namespace *ns) { } -static inline void shm_init_ns(struct ipc_namespace *ns) { } +static inline int sem_init_ns(struct ipc_namespace *ns) { return 0; } +static inline int msg_init_ns(struct ipc_namespace *ns) { return 0; } +static inline int shm_init_ns(struct ipc_namespace *ns) { return 0; } static inline void sem_exit_ns(struct ipc_namespace *ns) { } static inline void msg_exit_ns(struct ipc_namespace *ns) { } @@ -79,7 +79,7 @@ struct ipc_ops { struct seq_file; struct ipc_ids; -void ipc_init_ids(struct ipc_ids *); +int ipc_init_ids(struct ipc_ids *); #ifdef CONFIG_PROC_FS void __init ipc_init_proc_interface(const char *path, const char *header, int ids, int (*show)(struct seq_file *, void *)); @@ -104,6 +104,9 @@ int ipc_get_maxid(struct ipc_ids *); /* must be called with both locks acquired. */ void ipc_rmid(struct ipc_ids *, struct kern_ipc_perm *); +/* must be called with both locks acquired. */ +void ipc_set_key_private(struct ipc_ids *, struct kern_ipc_perm *); + /* must be called with ipcp locked */ int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg); diff --git a/kernel/Makefile b/kernel/Makefile index 9c323a6daa469ccff04771489ba17ee31183c03b..ed470aac53da1e3b47bd98517cb67deefd853588 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -5,12 +5,13 @@ obj-y = fork.o exec_domain.o panic.o \ cpu.o exit.o softirq.o resource.o \ sysctl.o sysctl_binary.o capability.o ptrace.o user.o \ - signal.o sys.o kmod.o workqueue.o pid.o task_work.o \ + signal.o sys.o umh.o workqueue.o pid.o task_work.o \ extable.o params.o \ kthread.o sys_ni.o nsproxy.o \ notifier.o ksysfs.o cred.o reboot.o \ async.o range.o smpboot.o ucount.o +obj-$(CONFIG_MODULES) += kmod.o obj-$(CONFIG_MULTIUSER) += groups.o ifdef CONFIG_FUNCTION_TRACER diff --git a/kernel/fork.c b/kernel/fork.c index 24a4c0be80d5abb30d5ba65f3b13eb05ff2d7690..6f1b0af00bdadb311503544906fbcc8c956fae7b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -824,6 +825,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm_init_owner(mm, p); RCU_INIT_POINTER(mm->exe_file, NULL); mmu_notifier_mm_init(mm); + hmm_mm_init(mm); init_tlb_flush_pending(mm); #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS mm->pmd_huge_pte = NULL; @@ -903,6 +905,7 @@ void __mmdrop(struct mm_struct *mm) BUG_ON(mm == &init_mm); mm_free_pgd(mm); destroy_context(mm); + hmm_mm_destroy(mm); mmu_notifier_mm_destroy(mm); check_mm(mm); put_user_ns(mm->user_ns); @@ -1459,8 +1462,7 @@ static void rt_mutex_init_task(struct task_struct *p) { raw_spin_lock_init(&p->pi_lock); #ifdef CONFIG_RT_MUTEXES - p->pi_waiters = RB_ROOT; - p->pi_waiters_leftmost = NULL; + p->pi_waiters = RB_ROOT_CACHED; p->pi_top_task = NULL; p->pi_blocked_on = NULL; #endif diff --git a/kernel/kcov.c b/kernel/kcov.c index cd771993f96f4a7ff23592f1abaea22c71444fda..3f693a0f6f3edc9e563b681ad47bf168e73d0dca 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -270,6 +270,7 @@ static long kcov_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) static const struct file_operations kcov_fops = { .open = kcov_open, .unlocked_ioctl = kcov_ioctl, + .compat_ioctl = kcov_ioctl, .mmap = kcov_mmap, .release = kcov_close, }; diff --git a/kernel/kmod.c b/kernel/kmod.c index 2f37acde640b6c6e2ce73933a52dbad65f014cf3..bc6addd9152b4db58c57757f1fe0a2911a16e459 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -1,23 +1,6 @@ /* - kmod, the new module loader (replaces kerneld) - Kirk Petersen - - Reorganized not to be a daemon by Adam Richter, with guidance - from Greg Zornetzer. - - Modified to avoid chroot and file sharing problems. - Mikael Pettersson - - Limit the concurrent number of kmod modprobes to catch loops from - "modprobe needs a service that is in a module". - Keith Owens December 1999 - - Unblock all signals when we exec a usermode process. - Shuu Yamaguchi December 2000 - - call_usermodehelper wait flag, and remove exec_usermodehelper. - Rusty Russell Jan 2003 -*/ + * kmod - the kernel module loader + */ #include #include #include @@ -45,15 +28,6 @@ #include -#define CAP_BSET (void *)1 -#define CAP_PI (void *)2 - -static kernel_cap_t usermodehelper_bset = CAP_FULL_SET; -static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET; -static DEFINE_SPINLOCK(umh_sysctl_lock); -static DECLARE_RWSEM(umhelper_sem); - -#ifdef CONFIG_MODULES /* * Assuming: * @@ -202,536 +176,3 @@ int __request_module(bool wait, const char *fmt, ...) return ret; } EXPORT_SYMBOL(__request_module); - -#endif /* CONFIG_MODULES */ - -static void call_usermodehelper_freeinfo(struct subprocess_info *info) -{ - if (info->cleanup) - (*info->cleanup)(info); - kfree(info); -} - -static void umh_complete(struct subprocess_info *sub_info) -{ - struct completion *comp = xchg(&sub_info->complete, NULL); - /* - * See call_usermodehelper_exec(). If xchg() returns NULL - * we own sub_info, the UMH_KILLABLE caller has gone away - * or the caller used UMH_NO_WAIT. - */ - if (comp) - complete(comp); - else - call_usermodehelper_freeinfo(sub_info); -} - -/* - * This is the task which runs the usermode application - */ -static int call_usermodehelper_exec_async(void *data) -{ - struct subprocess_info *sub_info = data; - struct cred *new; - int retval; - - spin_lock_irq(¤t->sighand->siglock); - flush_signal_handlers(current, 1); - spin_unlock_irq(¤t->sighand->siglock); - - /* - * Our parent (unbound workqueue) runs with elevated scheduling - * priority. Avoid propagating that into the userspace child. - */ - set_user_nice(current, 0); - - retval = -ENOMEM; - new = prepare_kernel_cred(current); - if (!new) - goto out; - - spin_lock(&umh_sysctl_lock); - new->cap_bset = cap_intersect(usermodehelper_bset, new->cap_bset); - new->cap_inheritable = cap_intersect(usermodehelper_inheritable, - new->cap_inheritable); - spin_unlock(&umh_sysctl_lock); - - if (sub_info->init) { - retval = sub_info->init(sub_info, new); - if (retval) { - abort_creds(new); - goto out; - } - } - - commit_creds(new); - - retval = do_execve(getname_kernel(sub_info->path), - (const char __user *const __user *)sub_info->argv, - (const char __user *const __user *)sub_info->envp); -out: - sub_info->retval = retval; - /* - * call_usermodehelper_exec_sync() will call umh_complete - * if UHM_WAIT_PROC. - */ - if (!(sub_info->wait & UMH_WAIT_PROC)) - umh_complete(sub_info); - if (!retval) - return 0; - do_exit(0); -} - -/* Handles UMH_WAIT_PROC. */ -static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info) -{ - pid_t pid; - - /* If SIGCLD is ignored sys_wait4 won't populate the status. */ - kernel_sigaction(SIGCHLD, SIG_DFL); - pid = kernel_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD); - if (pid < 0) { - sub_info->retval = pid; - } else { - int ret = -ECHILD; - /* - * Normally it is bogus to call wait4() from in-kernel because - * wait4() wants to write the exit code to a userspace address. - * But call_usermodehelper_exec_sync() always runs as kernel - * thread (workqueue) and put_user() to a kernel address works - * OK for kernel threads, due to their having an mm_segment_t - * which spans the entire address space. - * - * Thus the __user pointer cast is valid here. - */ - sys_wait4(pid, (int __user *)&ret, 0, NULL); - - /* - * If ret is 0, either call_usermodehelper_exec_async failed and - * the real error code is already in sub_info->retval or - * sub_info->retval is 0 anyway, so don't mess with it then. - */ - if (ret) - sub_info->retval = ret; - } - - /* Restore default kernel sig handler */ - kernel_sigaction(SIGCHLD, SIG_IGN); - - umh_complete(sub_info); -} - -/* - * We need to create the usermodehelper kernel thread from a task that is affine - * to an optimized set of CPUs (or nohz housekeeping ones) such that they - * inherit a widest affinity irrespective of call_usermodehelper() callers with - * possibly reduced affinity (eg: per-cpu workqueues). We don't want - * usermodehelper targets to contend a busy CPU. - * - * Unbound workqueues provide such wide affinity and allow to block on - * UMH_WAIT_PROC requests without blocking pending request (up to some limit). - * - * Besides, workqueues provide the privilege level that caller might not have - * to perform the usermodehelper request. - * - */ -static void call_usermodehelper_exec_work(struct work_struct *work) -{ - struct subprocess_info *sub_info = - container_of(work, struct subprocess_info, work); - - if (sub_info->wait & UMH_WAIT_PROC) { - call_usermodehelper_exec_sync(sub_info); - } else { - pid_t pid; - /* - * Use CLONE_PARENT to reparent it to kthreadd; we do not - * want to pollute current->children, and we need a parent - * that always ignores SIGCHLD to ensure auto-reaping. - */ - pid = kernel_thread(call_usermodehelper_exec_async, sub_info, - CLONE_PARENT | SIGCHLD); - if (pid < 0) { - sub_info->retval = pid; - umh_complete(sub_info); - } - } -} - -/* - * If set, call_usermodehelper_exec() will exit immediately returning -EBUSY - * (used for preventing user land processes from being created after the user - * land has been frozen during a system-wide hibernation or suspend operation). - * Should always be manipulated under umhelper_sem acquired for write. - */ -static enum umh_disable_depth usermodehelper_disabled = UMH_DISABLED; - -/* Number of helpers running */ -static atomic_t running_helpers = ATOMIC_INIT(0); - -/* - * Wait queue head used by usermodehelper_disable() to wait for all running - * helpers to finish. - */ -static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq); - -/* - * Used by usermodehelper_read_lock_wait() to wait for usermodehelper_disabled - * to become 'false'. - */ -static DECLARE_WAIT_QUEUE_HEAD(usermodehelper_disabled_waitq); - -/* - * Time to wait for running_helpers to become zero before the setting of - * usermodehelper_disabled in usermodehelper_disable() fails - */ -#define RUNNING_HELPERS_TIMEOUT (5 * HZ) - -int usermodehelper_read_trylock(void) -{ - DEFINE_WAIT(wait); - int ret = 0; - - down_read(&umhelper_sem); - for (;;) { - prepare_to_wait(&usermodehelper_disabled_waitq, &wait, - TASK_INTERRUPTIBLE); - if (!usermodehelper_disabled) - break; - - if (usermodehelper_disabled == UMH_DISABLED) - ret = -EAGAIN; - - up_read(&umhelper_sem); - - if (ret) - break; - - schedule(); - try_to_freeze(); - - down_read(&umhelper_sem); - } - finish_wait(&usermodehelper_disabled_waitq, &wait); - return ret; -} -EXPORT_SYMBOL_GPL(usermodehelper_read_trylock); - -long usermodehelper_read_lock_wait(long timeout) -{ - DEFINE_WAIT(wait); - - if (timeout < 0) - return -EINVAL; - - down_read(&umhelper_sem); - for (;;) { - prepare_to_wait(&usermodehelper_disabled_waitq, &wait, - TASK_UNINTERRUPTIBLE); - if (!usermodehelper_disabled) - break; - - up_read(&umhelper_sem); - - timeout = schedule_timeout(timeout); - if (!timeout) - break; - - down_read(&umhelper_sem); - } - finish_wait(&usermodehelper_disabled_waitq, &wait); - return timeout; -} -EXPORT_SYMBOL_GPL(usermodehelper_read_lock_wait); - -void usermodehelper_read_unlock(void) -{ - up_read(&umhelper_sem); -} -EXPORT_SYMBOL_GPL(usermodehelper_read_unlock); - -/** - * __usermodehelper_set_disable_depth - Modify usermodehelper_disabled. - * @depth: New value to assign to usermodehelper_disabled. - * - * Change the value of usermodehelper_disabled (under umhelper_sem locked for - * writing) and wakeup tasks waiting for it to change. - */ -void __usermodehelper_set_disable_depth(enum umh_disable_depth depth) -{ - down_write(&umhelper_sem); - usermodehelper_disabled = depth; - wake_up(&usermodehelper_disabled_waitq); - up_write(&umhelper_sem); -} - -/** - * __usermodehelper_disable - Prevent new helpers from being started. - * @depth: New value to assign to usermodehelper_disabled. - * - * Set usermodehelper_disabled to @depth and wait for running helpers to exit. - */ -int __usermodehelper_disable(enum umh_disable_depth depth) -{ - long retval; - - if (!depth) - return -EINVAL; - - down_write(&umhelper_sem); - usermodehelper_disabled = depth; - up_write(&umhelper_sem); - - /* - * From now on call_usermodehelper_exec() won't start any new - * helpers, so it is sufficient if running_helpers turns out to - * be zero at one point (it may be increased later, but that - * doesn't matter). - */ - retval = wait_event_timeout(running_helpers_waitq, - atomic_read(&running_helpers) == 0, - RUNNING_HELPERS_TIMEOUT); - if (retval) - return 0; - - __usermodehelper_set_disable_depth(UMH_ENABLED); - return -EAGAIN; -} - -static void helper_lock(void) -{ - atomic_inc(&running_helpers); - smp_mb__after_atomic(); -} - -static void helper_unlock(void) -{ - if (atomic_dec_and_test(&running_helpers)) - wake_up(&running_helpers_waitq); -} - -/** - * call_usermodehelper_setup - prepare to call a usermode helper - * @path: path to usermode executable - * @argv: arg vector for process - * @envp: environment for process - * @gfp_mask: gfp mask for memory allocation - * @cleanup: a cleanup function - * @init: an init function - * @data: arbitrary context sensitive data - * - * Returns either %NULL on allocation failure, or a subprocess_info - * structure. This should be passed to call_usermodehelper_exec to - * exec the process and free the structure. - * - * The init function is used to customize the helper process prior to - * exec. A non-zero return code causes the process to error out, exit, - * and return the failure to the calling process - * - * The cleanup function is just before ethe subprocess_info is about to - * be freed. This can be used for freeing the argv and envp. The - * Function must be runnable in either a process context or the - * context in which call_usermodehelper_exec is called. - */ -struct subprocess_info *call_usermodehelper_setup(const char *path, char **argv, - char **envp, gfp_t gfp_mask, - int (*init)(struct subprocess_info *info, struct cred *new), - void (*cleanup)(struct subprocess_info *info), - void *data) -{ - struct subprocess_info *sub_info; - sub_info = kzalloc(sizeof(struct subprocess_info), gfp_mask); - if (!sub_info) - goto out; - - INIT_WORK(&sub_info->work, call_usermodehelper_exec_work); - -#ifdef CONFIG_STATIC_USERMODEHELPER - sub_info->path = CONFIG_STATIC_USERMODEHELPER_PATH; -#else - sub_info->path = path; -#endif - sub_info->argv = argv; - sub_info->envp = envp; - - sub_info->cleanup = cleanup; - sub_info->init = init; - sub_info->data = data; - out: - return sub_info; -} -EXPORT_SYMBOL(call_usermodehelper_setup); - -/** - * call_usermodehelper_exec - start a usermode application - * @sub_info: information about the subprocessa - * @wait: wait for the application to finish and return status. - * when UMH_NO_WAIT don't wait at all, but you get no useful error back - * when the program couldn't be exec'ed. This makes it safe to call - * from interrupt context. - * - * Runs a user-space application. The application is started - * asynchronously if wait is not set, and runs as a child of system workqueues. - * (ie. it runs with full root capabilities and optimized affinity). - */ -int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) -{ - DECLARE_COMPLETION_ONSTACK(done); - int retval = 0; - - if (!sub_info->path) { - call_usermodehelper_freeinfo(sub_info); - return -EINVAL; - } - helper_lock(); - if (usermodehelper_disabled) { - retval = -EBUSY; - goto out; - } - - /* - * If there is no binary for us to call, then just return and get out of - * here. This allows us to set STATIC_USERMODEHELPER_PATH to "" and - * disable all call_usermodehelper() calls. - */ - if (strlen(sub_info->path) == 0) - goto out; - - /* - * Set the completion pointer only if there is a waiter. - * This makes it possible to use umh_complete to free - * the data structure in case of UMH_NO_WAIT. - */ - sub_info->complete = (wait == UMH_NO_WAIT) ? NULL : &done; - sub_info->wait = wait; - - queue_work(system_unbound_wq, &sub_info->work); - if (wait == UMH_NO_WAIT) /* task has freed sub_info */ - goto unlock; - - if (wait & UMH_KILLABLE) { - retval = wait_for_completion_killable(&done); - if (!retval) - goto wait_done; - - /* umh_complete() will see NULL and free sub_info */ - if (xchg(&sub_info->complete, NULL)) - goto unlock; - /* fallthrough, umh_complete() was already called */ - } - - wait_for_completion(&done); -wait_done: - retval = sub_info->retval; -out: - call_usermodehelper_freeinfo(sub_info); -unlock: - helper_unlock(); - return retval; -} -EXPORT_SYMBOL(call_usermodehelper_exec); - -/** - * call_usermodehelper() - prepare and start a usermode application - * @path: path to usermode executable - * @argv: arg vector for process - * @envp: environment for process - * @wait: wait for the application to finish and return status. - * when UMH_NO_WAIT don't wait at all, but you get no useful error back - * when the program couldn't be exec'ed. This makes it safe to call - * from interrupt context. - * - * This function is the equivalent to use call_usermodehelper_setup() and - * call_usermodehelper_exec(). - */ -int call_usermodehelper(const char *path, char **argv, char **envp, int wait) -{ - struct subprocess_info *info; - gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL; - - info = call_usermodehelper_setup(path, argv, envp, gfp_mask, - NULL, NULL, NULL); - if (info == NULL) - return -ENOMEM; - - return call_usermodehelper_exec(info, wait); -} -EXPORT_SYMBOL(call_usermodehelper); - -static int proc_cap_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - struct ctl_table t; - unsigned long cap_array[_KERNEL_CAPABILITY_U32S]; - kernel_cap_t new_cap; - int err, i; - - if (write && (!capable(CAP_SETPCAP) || - !capable(CAP_SYS_MODULE))) - return -EPERM; - - /* - * convert from the global kernel_cap_t to the ulong array to print to - * userspace if this is a read. - */ - spin_lock(&umh_sysctl_lock); - for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++) { - if (table->data == CAP_BSET) - cap_array[i] = usermodehelper_bset.cap[i]; - else if (table->data == CAP_PI) - cap_array[i] = usermodehelper_inheritable.cap[i]; - else - BUG(); - } - spin_unlock(&umh_sysctl_lock); - - t = *table; - t.data = &cap_array; - - /* - * actually read or write and array of ulongs from userspace. Remember - * these are least significant 32 bits first - */ - err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos); - if (err < 0) - return err; - - /* - * convert from the sysctl array of ulongs to the kernel_cap_t - * internal representation - */ - for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++) - new_cap.cap[i] = cap_array[i]; - - /* - * Drop everything not in the new_cap (but don't add things) - */ - spin_lock(&umh_sysctl_lock); - if (write) { - if (table->data == CAP_BSET) - usermodehelper_bset = cap_intersect(usermodehelper_bset, new_cap); - if (table->data == CAP_PI) - usermodehelper_inheritable = cap_intersect(usermodehelper_inheritable, new_cap); - } - spin_unlock(&umh_sysctl_lock); - - return 0; -} - -struct ctl_table usermodehelper_table[] = { - { - .procname = "bset", - .data = CAP_BSET, - .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long), - .mode = 0600, - .proc_handler = proc_cap_handler, - }, - { - .procname = "inheritable", - .data = CAP_PI, - .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long), - .mode = 0600, - .proc_handler = proc_cap_handler, - }, - { } -}; diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c index ac35e648b0e519ffc0a96a54d85f57fd1c0d2c1d..f4a74e78d4678f767f8d18d801db43dfeb92cece 100644 --- a/kernel/locking/rtmutex-debug.c +++ b/kernel/locking/rtmutex-debug.c @@ -58,7 +58,7 @@ static void printk_lock(struct rt_mutex *lock, int print_owner) void rt_mutex_debug_task_free(struct task_struct *task) { - DEBUG_LOCKS_WARN_ON(!RB_EMPTY_ROOT(&task->pi_waiters)); + DEBUG_LOCKS_WARN_ON(!RB_EMPTY_ROOT(&task->pi_waiters.rb_root)); DEBUG_LOCKS_WARN_ON(task->pi_blocked_on); } diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 649dc9d3951a5fed57930769957552841528f784..6f3dba6e4e9e14332dfb02039dd720f84d0348b0 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -271,10 +271,10 @@ rt_mutex_waiter_equal(struct rt_mutex_waiter *left, static void rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) { - struct rb_node **link = &lock->waiters.rb_node; + struct rb_node **link = &lock->waiters.rb_root.rb_node; struct rb_node *parent = NULL; struct rt_mutex_waiter *entry; - int leftmost = 1; + bool leftmost = true; while (*link) { parent = *link; @@ -283,15 +283,12 @@ rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) link = &parent->rb_left; } else { link = &parent->rb_right; - leftmost = 0; + leftmost = false; } } - if (leftmost) - lock->waiters_leftmost = &waiter->tree_entry; - rb_link_node(&waiter->tree_entry, parent, link); - rb_insert_color(&waiter->tree_entry, &lock->waiters); + rb_insert_color_cached(&waiter->tree_entry, &lock->waiters, leftmost); } static void @@ -300,20 +297,17 @@ rt_mutex_dequeue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) if (RB_EMPTY_NODE(&waiter->tree_entry)) return; - if (lock->waiters_leftmost == &waiter->tree_entry) - lock->waiters_leftmost = rb_next(&waiter->tree_entry); - - rb_erase(&waiter->tree_entry, &lock->waiters); + rb_erase_cached(&waiter->tree_entry, &lock->waiters); RB_CLEAR_NODE(&waiter->tree_entry); } static void rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) { - struct rb_node **link = &task->pi_waiters.rb_node; + struct rb_node **link = &task->pi_waiters.rb_root.rb_node; struct rb_node *parent = NULL; struct rt_mutex_waiter *entry; - int leftmost = 1; + bool leftmost = true; while (*link) { parent = *link; @@ -322,15 +316,12 @@ rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) link = &parent->rb_left; } else { link = &parent->rb_right; - leftmost = 0; + leftmost = false; } } - if (leftmost) - task->pi_waiters_leftmost = &waiter->pi_tree_entry; - rb_link_node(&waiter->pi_tree_entry, parent, link); - rb_insert_color(&waiter->pi_tree_entry, &task->pi_waiters); + rb_insert_color_cached(&waiter->pi_tree_entry, &task->pi_waiters, leftmost); } static void @@ -339,10 +330,7 @@ rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) if (RB_EMPTY_NODE(&waiter->pi_tree_entry)) return; - if (task->pi_waiters_leftmost == &waiter->pi_tree_entry) - task->pi_waiters_leftmost = rb_next(&waiter->pi_tree_entry); - - rb_erase(&waiter->pi_tree_entry, &task->pi_waiters); + rb_erase_cached(&waiter->pi_tree_entry, &task->pi_waiters); RB_CLEAR_NODE(&waiter->pi_tree_entry); } @@ -1657,8 +1645,7 @@ void __rt_mutex_init(struct rt_mutex *lock, const char *name, { lock->owner = NULL; raw_spin_lock_init(&lock->wait_lock); - lock->waiters = RB_ROOT; - lock->waiters_leftmost = NULL; + lock->waiters = RB_ROOT_CACHED; if (name && key) debug_rt_mutex_init(lock, name, key); diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index 8d039b928d610932206f49babe739bacda5b0c15..7453be0485a5e85738aa6de657c111d85494e08b 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -45,7 +45,7 @@ struct rt_mutex_waiter { static inline int rt_mutex_has_waiters(struct rt_mutex *lock) { - return !RB_EMPTY_ROOT(&lock->waiters); + return !RB_EMPTY_ROOT(&lock->waiters.rb_root); } static inline struct rt_mutex_waiter * @@ -53,8 +53,8 @@ rt_mutex_top_waiter(struct rt_mutex *lock) { struct rt_mutex_waiter *w; - w = rb_entry(lock->waiters_leftmost, struct rt_mutex_waiter, - tree_entry); + w = rb_entry(lock->waiters.rb_leftmost, + struct rt_mutex_waiter, tree_entry); BUG_ON(w->lock != lock); return w; @@ -62,14 +62,14 @@ rt_mutex_top_waiter(struct rt_mutex *lock) static inline int task_has_pi_waiters(struct task_struct *p) { - return !RB_EMPTY_ROOT(&p->pi_waiters); + return !RB_EMPTY_ROOT(&p->pi_waiters.rb_root); } static inline struct rt_mutex_waiter * task_top_pi_waiter(struct task_struct *p) { - return rb_entry(p->pi_waiters_leftmost, struct rt_mutex_waiter, - pi_tree_entry); + return rb_entry(p->pi_waiters.rb_leftmost, + struct rt_mutex_waiter, pi_tree_entry); } #else diff --git a/kernel/memremap.c b/kernel/memremap.c index 066e73c2fcc9dbbd89c13923e203ed47aaff7d58..6bcbfbf1a8fdfd2f1008cde707db9a798a68cdc6 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -11,13 +11,14 @@ * General Public License for more details. */ #include -#include #include #include #include #include #include #include +#include +#include #ifndef ioremap_cache /* temporary while we convert existing ioremap_cache users to memremap */ @@ -219,6 +220,34 @@ static unsigned long order_at(struct resource *res, unsigned long pgoff) for (pgoff = 0, order = order_at((res), pgoff); order < ULONG_MAX; \ pgoff += 1UL << order, order = order_at((res), pgoff)) +#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) +int device_private_entry_fault(struct vm_area_struct *vma, + unsigned long addr, + swp_entry_t entry, + unsigned int flags, + pmd_t *pmdp) +{ + struct page *page = device_private_entry_to_page(entry); + + /* + * The page_fault() callback must migrate page back to system memory + * so that CPU can access it. This might fail for various reasons + * (device issue, device was unsafely unplugged, ...). When such + * error conditions happen, the callback must return VM_FAULT_SIGBUS. + * + * Note that because memory cgroup charges are accounted to the device + * memory, this should never fail because of memory restrictions (but + * allocation of regular system page might still fail because we are + * out of memory). + * + * There is a more in-depth description of what that callback can and + * cannot do, in include/linux/memremap.h + */ + return page->pgmap->page_fault(vma, addr, page, flags, pmdp); +} +EXPORT_SYMBOL(device_private_entry_fault); +#endif /* CONFIG_DEVICE_PRIVATE */ + static void pgmap_radix_release(struct resource *res) { unsigned long pgoff, order; @@ -356,6 +385,10 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, } pgmap->ref = ref; pgmap->res = &page_map->res; + pgmap->type = MEMORY_DEVICE_HOST; + pgmap->page_fault = NULL; + pgmap->page_free = NULL; + pgmap->data = NULL; mutex_lock(&pgmap_lock); error = 0; @@ -466,3 +499,28 @@ struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) return pgmap ? pgmap->altmap : NULL; } #endif /* CONFIG_ZONE_DEVICE */ + + +#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC) +void put_zone_device_private_or_public_page(struct page *page) +{ + int count = page_ref_dec_return(page); + + /* + * If refcount is 1 then page is freed and refcount is stable as nobody + * holds a reference on the page. + */ + if (count == 1) { + /* Clear Active bit in case of parallel mark_page_accessed */ + __ClearPageActive(page); + __ClearPageWaiters(page); + + page->mapping = NULL; + mem_cgroup_uncharge(page); + + page->pgmap->page_free(page, page->pgmap->data); + } else if (!count) + __put_page(page); +} +EXPORT_SYMBOL(put_zone_device_private_or_public_page); +#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 84fe96641b2e05f8c126d9716685a6a237d704fe..1250e4bd4b85e690c7db2b061a87599e8651a606 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4091,7 +4091,7 @@ static void __init rcu_init_geometry(void) if (rcu_fanout_leaf == RCU_FANOUT_LEAF && nr_cpu_ids == NR_CPUS) return; - pr_info("RCU: Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%d\n", + pr_info("RCU: Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%u\n", rcu_fanout_leaf, nr_cpu_ids); /* diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 55bde94b95728bae7eb1e9b4ead94e18dfa0e4e8..e012b9be777e3ba00ccf2fe4d05df78b78abd8f5 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -89,7 +89,7 @@ static void __init rcu_bootup_announce_oddness(void) if (rcu_fanout_leaf != RCU_FANOUT_LEAF) pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); if (nr_cpu_ids != NR_CPUS) - pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); + pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%u.\n", NR_CPUS, nr_cpu_ids); #ifdef CONFIG_RCU_BOOST pr_info("\tRCU priority boosting: priority %d delay %d ms.\n", kthread_prio, CONFIG_RCU_BOOST_DELAY); #endif diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 9e38df7649f49dafd2b6466ca83141571627225f..0191ec7667c326d603ed058f9a393af0436b7c76 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -296,7 +296,7 @@ static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq) { struct sched_dl_entity *dl_se = &p->dl; - return dl_rq->rb_leftmost == &dl_se->rb_node; + return dl_rq->root.rb_leftmost == &dl_se->rb_node; } void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime) @@ -320,7 +320,7 @@ void init_dl_bw(struct dl_bw *dl_b) void init_dl_rq(struct dl_rq *dl_rq) { - dl_rq->rb_root = RB_ROOT; + dl_rq->root = RB_ROOT_CACHED; #ifdef CONFIG_SMP /* zero means no -deadline tasks */ @@ -328,7 +328,7 @@ void init_dl_rq(struct dl_rq *dl_rq) dl_rq->dl_nr_migratory = 0; dl_rq->overloaded = 0; - dl_rq->pushable_dl_tasks_root = RB_ROOT; + dl_rq->pushable_dl_tasks_root = RB_ROOT_CACHED; #else init_dl_bw(&dl_rq->dl_bw); #endif @@ -410,10 +410,10 @@ static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p) { struct dl_rq *dl_rq = &rq->dl; - struct rb_node **link = &dl_rq->pushable_dl_tasks_root.rb_node; + struct rb_node **link = &dl_rq->pushable_dl_tasks_root.rb_root.rb_node; struct rb_node *parent = NULL; struct task_struct *entry; - int leftmost = 1; + bool leftmost = true; BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks)); @@ -425,17 +425,16 @@ static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p) link = &parent->rb_left; else { link = &parent->rb_right; - leftmost = 0; + leftmost = false; } } - if (leftmost) { - dl_rq->pushable_dl_tasks_leftmost = &p->pushable_dl_tasks; + if (leftmost) dl_rq->earliest_dl.next = p->dl.deadline; - } rb_link_node(&p->pushable_dl_tasks, parent, link); - rb_insert_color(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root); + rb_insert_color_cached(&p->pushable_dl_tasks, + &dl_rq->pushable_dl_tasks_root, leftmost); } static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p) @@ -445,24 +444,23 @@ static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p) if (RB_EMPTY_NODE(&p->pushable_dl_tasks)) return; - if (dl_rq->pushable_dl_tasks_leftmost == &p->pushable_dl_tasks) { + if (dl_rq->pushable_dl_tasks_root.rb_leftmost == &p->pushable_dl_tasks) { struct rb_node *next_node; next_node = rb_next(&p->pushable_dl_tasks); - dl_rq->pushable_dl_tasks_leftmost = next_node; if (next_node) { dl_rq->earliest_dl.next = rb_entry(next_node, struct task_struct, pushable_dl_tasks)->dl.deadline; } } - rb_erase(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root); + rb_erase_cached(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root); RB_CLEAR_NODE(&p->pushable_dl_tasks); } static inline int has_pushable_dl_tasks(struct rq *rq) { - return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root); + return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root.rb_root); } static int push_dl_task(struct rq *rq); @@ -1266,7 +1264,7 @@ static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) dl_rq->earliest_dl.next = 0; cpudl_clear(&rq->rd->cpudl, rq->cpu); } else { - struct rb_node *leftmost = dl_rq->rb_leftmost; + struct rb_node *leftmost = dl_rq->root.rb_leftmost; struct sched_dl_entity *entry; entry = rb_entry(leftmost, struct sched_dl_entity, rb_node); @@ -1313,7 +1311,7 @@ void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) static void __enqueue_dl_entity(struct sched_dl_entity *dl_se) { struct dl_rq *dl_rq = dl_rq_of_se(dl_se); - struct rb_node **link = &dl_rq->rb_root.rb_node; + struct rb_node **link = &dl_rq->root.rb_root.rb_node; struct rb_node *parent = NULL; struct sched_dl_entity *entry; int leftmost = 1; @@ -1331,11 +1329,8 @@ static void __enqueue_dl_entity(struct sched_dl_entity *dl_se) } } - if (leftmost) - dl_rq->rb_leftmost = &dl_se->rb_node; - rb_link_node(&dl_se->rb_node, parent, link); - rb_insert_color(&dl_se->rb_node, &dl_rq->rb_root); + rb_insert_color_cached(&dl_se->rb_node, &dl_rq->root, leftmost); inc_dl_tasks(dl_se, dl_rq); } @@ -1347,14 +1342,7 @@ static void __dequeue_dl_entity(struct sched_dl_entity *dl_se) if (RB_EMPTY_NODE(&dl_se->rb_node)) return; - if (dl_rq->rb_leftmost == &dl_se->rb_node) { - struct rb_node *next_node; - - next_node = rb_next(&dl_se->rb_node); - dl_rq->rb_leftmost = next_node; - } - - rb_erase(&dl_se->rb_node, &dl_rq->rb_root); + rb_erase_cached(&dl_se->rb_node, &dl_rq->root); RB_CLEAR_NODE(&dl_se->rb_node); dec_dl_tasks(dl_se, dl_rq); @@ -1647,7 +1635,7 @@ static void start_hrtick_dl(struct rq *rq, struct task_struct *p) static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq, struct dl_rq *dl_rq) { - struct rb_node *left = dl_rq->rb_leftmost; + struct rb_node *left = rb_first_cached(&dl_rq->root); if (!left) return NULL; @@ -1771,7 +1759,7 @@ static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu) */ static struct task_struct *pick_earliest_pushable_dl_task(struct rq *rq, int cpu) { - struct rb_node *next_node = rq->dl.pushable_dl_tasks_leftmost; + struct rb_node *next_node = rq->dl.pushable_dl_tasks_root.rb_leftmost; struct task_struct *p = NULL; if (!has_pushable_dl_tasks(rq)) @@ -1945,7 +1933,7 @@ static struct task_struct *pick_next_pushable_dl_task(struct rq *rq) if (!has_pushable_dl_tasks(rq)) return NULL; - p = rb_entry(rq->dl.pushable_dl_tasks_leftmost, + p = rb_entry(rq->dl.pushable_dl_tasks_root.rb_leftmost, struct task_struct, pushable_dl_tasks); BUG_ON(rq->cpu != task_cpu(p)); diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 4a23bbc3111bd287ce4437e6fa2de530f9a56406..8e536d963652c230e8f08f906d723826b068faaa 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -530,7 +530,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) SPLIT_NS(cfs_rq->exec_clock)); raw_spin_lock_irqsave(&rq->lock, flags); - if (cfs_rq->rb_leftmost) + if (rb_first_cached(&cfs_rq->tasks_timeline)) MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime; last = __pick_last_entity(cfs_rq); if (last) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 8bc0a883d19045145ad7b839c23ca2ec1dc96bfe..a5d83ed8dd824c180eede5643176d9274c672da7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -513,6 +513,7 @@ static inline int entity_before(struct sched_entity *a, static void update_min_vruntime(struct cfs_rq *cfs_rq) { struct sched_entity *curr = cfs_rq->curr; + struct rb_node *leftmost = rb_first_cached(&cfs_rq->tasks_timeline); u64 vruntime = cfs_rq->min_vruntime; @@ -523,10 +524,9 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq) curr = NULL; } - if (cfs_rq->rb_leftmost) { - struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost, - struct sched_entity, - run_node); + if (leftmost) { /* non-empty tree */ + struct sched_entity *se; + se = rb_entry(leftmost, struct sched_entity, run_node); if (!curr) vruntime = se->vruntime; @@ -547,10 +547,10 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq) */ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) { - struct rb_node **link = &cfs_rq->tasks_timeline.rb_node; + struct rb_node **link = &cfs_rq->tasks_timeline.rb_root.rb_node; struct rb_node *parent = NULL; struct sched_entity *entry; - int leftmost = 1; + bool leftmost = true; /* * Find the right place in the rbtree: @@ -566,36 +566,23 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) link = &parent->rb_left; } else { link = &parent->rb_right; - leftmost = 0; + leftmost = false; } } - /* - * Maintain a cache of leftmost tree entries (it is frequently - * used): - */ - if (leftmost) - cfs_rq->rb_leftmost = &se->run_node; - rb_link_node(&se->run_node, parent, link); - rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline); + rb_insert_color_cached(&se->run_node, + &cfs_rq->tasks_timeline, leftmost); } static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) { - if (cfs_rq->rb_leftmost == &se->run_node) { - struct rb_node *next_node; - - next_node = rb_next(&se->run_node); - cfs_rq->rb_leftmost = next_node; - } - - rb_erase(&se->run_node, &cfs_rq->tasks_timeline); + rb_erase_cached(&se->run_node, &cfs_rq->tasks_timeline); } struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq) { - struct rb_node *left = cfs_rq->rb_leftmost; + struct rb_node *left = rb_first_cached(&cfs_rq->tasks_timeline); if (!left) return NULL; @@ -616,7 +603,7 @@ static struct sched_entity *__pick_next_entity(struct sched_entity *se) #ifdef CONFIG_SCHED_DEBUG struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) { - struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); + struct rb_node *last = rb_last(&cfs_rq->tasks_timeline.rb_root); if (!last) return NULL; @@ -9312,7 +9299,7 @@ static void set_curr_task_fair(struct rq *rq) void init_cfs_rq(struct cfs_rq *cfs_rq) { - cfs_rq->tasks_timeline = RB_ROOT; + cfs_rq->tasks_timeline = RB_ROOT_CACHED; cfs_rq->min_vruntime = (u64)(-(1LL << 20)); #ifndef CONFIG_64BIT cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 6ed7962dc89668f55b26f29c41f8af86ae30890a..746ac78ff4927d03081688d3f140d6de5d0e7d02 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -426,8 +426,7 @@ struct cfs_rq { u64 min_vruntime_copy; #endif - struct rb_root tasks_timeline; - struct rb_node *rb_leftmost; + struct rb_root_cached tasks_timeline; /* * 'curr' points to currently running entity on this cfs_rq. @@ -550,8 +549,7 @@ struct rt_rq { /* Deadline class' related fields in a runqueue */ struct dl_rq { /* runqueue is an rbtree, ordered by deadline */ - struct rb_root rb_root; - struct rb_node *rb_leftmost; + struct rb_root_cached root; unsigned long dl_nr_running; @@ -575,8 +573,7 @@ struct dl_rq { * an rb-tree, ordered by tasks' deadlines, with caching * of the leftmost (earliest deadline) element. */ - struct rb_root pushable_dl_tasks_root; - struct rb_node *pushable_dl_tasks_leftmost; + struct rb_root_cached pushable_dl_tasks_root; #else struct dl_bw dl_bw; #endif diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 6f7b43982f735d340948c6d2c1be41460ea72806..5d0062cc10cb8f99fffa3b8c1cc548f4329c46ed 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -473,7 +473,7 @@ static int __init isolated_cpu_setup(char *str) alloc_bootmem_cpumask_var(&cpu_isolated_map); ret = cpulist_parse(str, cpu_isolated_map); if (ret) { - pr_err("sched: Error, all isolcpus= values must be between 0 and %d\n", nr_cpu_ids); + pr_err("sched: Error, all isolcpus= values must be between 0 and %u\n", nr_cpu_ids); return 0; } return 1; diff --git a/kernel/smp.c b/kernel/smp.c index 81cfca9b4cc3b37557b9e2c09602f3baf1f297a8..c94dd85c8d41798443a7080af55c24fe508815b3 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -550,7 +550,7 @@ static int __init maxcpus(char *str) early_param("maxcpus", maxcpus); /* Setup number of possible processor ids */ -int nr_cpu_ids __read_mostly = NR_CPUS; +unsigned int nr_cpu_ids __read_mostly = NR_CPUS; EXPORT_SYMBOL(nr_cpu_ids); /* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 8ea4fb31571928913781b75b70b75210a46b3d8d..2cafb49aa65e13b5b1d29223e8943d53386a795d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2316,7 +2316,7 @@ void hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts) raw_spin_unlock_irqrestore(&timekeeper_lock, flags); } EXPORT_SYMBOL(hardpps); -#endif +#endif /* CONFIG_NTP_PPS */ /** * xtime_update() - advances the timekeeping infrastructure diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index d56123cdcc89027ee5841cf6573de474b38ebb39..b8f1f54731af0ab1190783291ffa1ca9aaf33feb 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -1543,7 +1543,7 @@ fs_initcall(init_graph_tracefs); static __init int init_graph_trace(void) { - max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1); + max_bytes_for_cpu = snprintf(NULL, 0, "%u", nr_cpu_ids - 1); if (!register_trace_event(&graph_trace_entry_event)) { pr_warn("Warning: could not register graph trace events\n"); diff --git a/kernel/umh.c b/kernel/umh.c new file mode 100644 index 0000000000000000000000000000000000000000..6ff9905250ff0563713c64585a74e888de70f343 --- /dev/null +++ b/kernel/umh.c @@ -0,0 +1,568 @@ +/* + * umh - the kernel usermode helper + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define CAP_BSET (void *)1 +#define CAP_PI (void *)2 + +static kernel_cap_t usermodehelper_bset = CAP_FULL_SET; +static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET; +static DEFINE_SPINLOCK(umh_sysctl_lock); +static DECLARE_RWSEM(umhelper_sem); + +static void call_usermodehelper_freeinfo(struct subprocess_info *info) +{ + if (info->cleanup) + (*info->cleanup)(info); + kfree(info); +} + +static void umh_complete(struct subprocess_info *sub_info) +{ + struct completion *comp = xchg(&sub_info->complete, NULL); + /* + * See call_usermodehelper_exec(). If xchg() returns NULL + * we own sub_info, the UMH_KILLABLE caller has gone away + * or the caller used UMH_NO_WAIT. + */ + if (comp) + complete(comp); + else + call_usermodehelper_freeinfo(sub_info); +} + +/* + * This is the task which runs the usermode application + */ +static int call_usermodehelper_exec_async(void *data) +{ + struct subprocess_info *sub_info = data; + struct cred *new; + int retval; + + spin_lock_irq(¤t->sighand->siglock); + flush_signal_handlers(current, 1); + spin_unlock_irq(¤t->sighand->siglock); + + /* + * Our parent (unbound workqueue) runs with elevated scheduling + * priority. Avoid propagating that into the userspace child. + */ + set_user_nice(current, 0); + + retval = -ENOMEM; + new = prepare_kernel_cred(current); + if (!new) + goto out; + + spin_lock(&umh_sysctl_lock); + new->cap_bset = cap_intersect(usermodehelper_bset, new->cap_bset); + new->cap_inheritable = cap_intersect(usermodehelper_inheritable, + new->cap_inheritable); + spin_unlock(&umh_sysctl_lock); + + if (sub_info->init) { + retval = sub_info->init(sub_info, new); + if (retval) { + abort_creds(new); + goto out; + } + } + + commit_creds(new); + + retval = do_execve(getname_kernel(sub_info->path), + (const char __user *const __user *)sub_info->argv, + (const char __user *const __user *)sub_info->envp); +out: + sub_info->retval = retval; + /* + * call_usermodehelper_exec_sync() will call umh_complete + * if UHM_WAIT_PROC. + */ + if (!(sub_info->wait & UMH_WAIT_PROC)) + umh_complete(sub_info); + if (!retval) + return 0; + do_exit(0); +} + +/* Handles UMH_WAIT_PROC. */ +static void call_usermodehelper_exec_sync(struct subprocess_info *sub_info) +{ + pid_t pid; + + /* If SIGCLD is ignored sys_wait4 won't populate the status. */ + kernel_sigaction(SIGCHLD, SIG_DFL); + pid = kernel_thread(call_usermodehelper_exec_async, sub_info, SIGCHLD); + if (pid < 0) { + sub_info->retval = pid; + } else { + int ret = -ECHILD; + /* + * Normally it is bogus to call wait4() from in-kernel because + * wait4() wants to write the exit code to a userspace address. + * But call_usermodehelper_exec_sync() always runs as kernel + * thread (workqueue) and put_user() to a kernel address works + * OK for kernel threads, due to their having an mm_segment_t + * which spans the entire address space. + * + * Thus the __user pointer cast is valid here. + */ + sys_wait4(pid, (int __user *)&ret, 0, NULL); + + /* + * If ret is 0, either call_usermodehelper_exec_async failed and + * the real error code is already in sub_info->retval or + * sub_info->retval is 0 anyway, so don't mess with it then. + */ + if (ret) + sub_info->retval = ret; + } + + /* Restore default kernel sig handler */ + kernel_sigaction(SIGCHLD, SIG_IGN); + + umh_complete(sub_info); +} + +/* + * We need to create the usermodehelper kernel thread from a task that is affine + * to an optimized set of CPUs (or nohz housekeeping ones) such that they + * inherit a widest affinity irrespective of call_usermodehelper() callers with + * possibly reduced affinity (eg: per-cpu workqueues). We don't want + * usermodehelper targets to contend a busy CPU. + * + * Unbound workqueues provide such wide affinity and allow to block on + * UMH_WAIT_PROC requests without blocking pending request (up to some limit). + * + * Besides, workqueues provide the privilege level that caller might not have + * to perform the usermodehelper request. + * + */ +static void call_usermodehelper_exec_work(struct work_struct *work) +{ + struct subprocess_info *sub_info = + container_of(work, struct subprocess_info, work); + + if (sub_info->wait & UMH_WAIT_PROC) { + call_usermodehelper_exec_sync(sub_info); + } else { + pid_t pid; + /* + * Use CLONE_PARENT to reparent it to kthreadd; we do not + * want to pollute current->children, and we need a parent + * that always ignores SIGCHLD to ensure auto-reaping. + */ + pid = kernel_thread(call_usermodehelper_exec_async, sub_info, + CLONE_PARENT | SIGCHLD); + if (pid < 0) { + sub_info->retval = pid; + umh_complete(sub_info); + } + } +} + +/* + * If set, call_usermodehelper_exec() will exit immediately returning -EBUSY + * (used for preventing user land processes from being created after the user + * land has been frozen during a system-wide hibernation or suspend operation). + * Should always be manipulated under umhelper_sem acquired for write. + */ +static enum umh_disable_depth usermodehelper_disabled = UMH_DISABLED; + +/* Number of helpers running */ +static atomic_t running_helpers = ATOMIC_INIT(0); + +/* + * Wait queue head used by usermodehelper_disable() to wait for all running + * helpers to finish. + */ +static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq); + +/* + * Used by usermodehelper_read_lock_wait() to wait for usermodehelper_disabled + * to become 'false'. + */ +static DECLARE_WAIT_QUEUE_HEAD(usermodehelper_disabled_waitq); + +/* + * Time to wait for running_helpers to become zero before the setting of + * usermodehelper_disabled in usermodehelper_disable() fails + */ +#define RUNNING_HELPERS_TIMEOUT (5 * HZ) + +int usermodehelper_read_trylock(void) +{ + DEFINE_WAIT(wait); + int ret = 0; + + down_read(&umhelper_sem); + for (;;) { + prepare_to_wait(&usermodehelper_disabled_waitq, &wait, + TASK_INTERRUPTIBLE); + if (!usermodehelper_disabled) + break; + + if (usermodehelper_disabled == UMH_DISABLED) + ret = -EAGAIN; + + up_read(&umhelper_sem); + + if (ret) + break; + + schedule(); + try_to_freeze(); + + down_read(&umhelper_sem); + } + finish_wait(&usermodehelper_disabled_waitq, &wait); + return ret; +} +EXPORT_SYMBOL_GPL(usermodehelper_read_trylock); + +long usermodehelper_read_lock_wait(long timeout) +{ + DEFINE_WAIT(wait); + + if (timeout < 0) + return -EINVAL; + + down_read(&umhelper_sem); + for (;;) { + prepare_to_wait(&usermodehelper_disabled_waitq, &wait, + TASK_UNINTERRUPTIBLE); + if (!usermodehelper_disabled) + break; + + up_read(&umhelper_sem); + + timeout = schedule_timeout(timeout); + if (!timeout) + break; + + down_read(&umhelper_sem); + } + finish_wait(&usermodehelper_disabled_waitq, &wait); + return timeout; +} +EXPORT_SYMBOL_GPL(usermodehelper_read_lock_wait); + +void usermodehelper_read_unlock(void) +{ + up_read(&umhelper_sem); +} +EXPORT_SYMBOL_GPL(usermodehelper_read_unlock); + +/** + * __usermodehelper_set_disable_depth - Modify usermodehelper_disabled. + * @depth: New value to assign to usermodehelper_disabled. + * + * Change the value of usermodehelper_disabled (under umhelper_sem locked for + * writing) and wakeup tasks waiting for it to change. + */ +void __usermodehelper_set_disable_depth(enum umh_disable_depth depth) +{ + down_write(&umhelper_sem); + usermodehelper_disabled = depth; + wake_up(&usermodehelper_disabled_waitq); + up_write(&umhelper_sem); +} + +/** + * __usermodehelper_disable - Prevent new helpers from being started. + * @depth: New value to assign to usermodehelper_disabled. + * + * Set usermodehelper_disabled to @depth and wait for running helpers to exit. + */ +int __usermodehelper_disable(enum umh_disable_depth depth) +{ + long retval; + + if (!depth) + return -EINVAL; + + down_write(&umhelper_sem); + usermodehelper_disabled = depth; + up_write(&umhelper_sem); + + /* + * From now on call_usermodehelper_exec() won't start any new + * helpers, so it is sufficient if running_helpers turns out to + * be zero at one point (it may be increased later, but that + * doesn't matter). + */ + retval = wait_event_timeout(running_helpers_waitq, + atomic_read(&running_helpers) == 0, + RUNNING_HELPERS_TIMEOUT); + if (retval) + return 0; + + __usermodehelper_set_disable_depth(UMH_ENABLED); + return -EAGAIN; +} + +static void helper_lock(void) +{ + atomic_inc(&running_helpers); + smp_mb__after_atomic(); +} + +static void helper_unlock(void) +{ + if (atomic_dec_and_test(&running_helpers)) + wake_up(&running_helpers_waitq); +} + +/** + * call_usermodehelper_setup - prepare to call a usermode helper + * @path: path to usermode executable + * @argv: arg vector for process + * @envp: environment for process + * @gfp_mask: gfp mask for memory allocation + * @cleanup: a cleanup function + * @init: an init function + * @data: arbitrary context sensitive data + * + * Returns either %NULL on allocation failure, or a subprocess_info + * structure. This should be passed to call_usermodehelper_exec to + * exec the process and free the structure. + * + * The init function is used to customize the helper process prior to + * exec. A non-zero return code causes the process to error out, exit, + * and return the failure to the calling process + * + * The cleanup function is just before ethe subprocess_info is about to + * be freed. This can be used for freeing the argv and envp. The + * Function must be runnable in either a process context or the + * context in which call_usermodehelper_exec is called. + */ +struct subprocess_info *call_usermodehelper_setup(const char *path, char **argv, + char **envp, gfp_t gfp_mask, + int (*init)(struct subprocess_info *info, struct cred *new), + void (*cleanup)(struct subprocess_info *info), + void *data) +{ + struct subprocess_info *sub_info; + sub_info = kzalloc(sizeof(struct subprocess_info), gfp_mask); + if (!sub_info) + goto out; + + INIT_WORK(&sub_info->work, call_usermodehelper_exec_work); + +#ifdef CONFIG_STATIC_USERMODEHELPER + sub_info->path = CONFIG_STATIC_USERMODEHELPER_PATH; +#else + sub_info->path = path; +#endif + sub_info->argv = argv; + sub_info->envp = envp; + + sub_info->cleanup = cleanup; + sub_info->init = init; + sub_info->data = data; + out: + return sub_info; +} +EXPORT_SYMBOL(call_usermodehelper_setup); + +/** + * call_usermodehelper_exec - start a usermode application + * @sub_info: information about the subprocessa + * @wait: wait for the application to finish and return status. + * when UMH_NO_WAIT don't wait at all, but you get no useful error back + * when the program couldn't be exec'ed. This makes it safe to call + * from interrupt context. + * + * Runs a user-space application. The application is started + * asynchronously if wait is not set, and runs as a child of system workqueues. + * (ie. it runs with full root capabilities and optimized affinity). + */ +int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) +{ + DECLARE_COMPLETION_ONSTACK(done); + int retval = 0; + + if (!sub_info->path) { + call_usermodehelper_freeinfo(sub_info); + return -EINVAL; + } + helper_lock(); + if (usermodehelper_disabled) { + retval = -EBUSY; + goto out; + } + + /* + * If there is no binary for us to call, then just return and get out of + * here. This allows us to set STATIC_USERMODEHELPER_PATH to "" and + * disable all call_usermodehelper() calls. + */ + if (strlen(sub_info->path) == 0) + goto out; + + /* + * Set the completion pointer only if there is a waiter. + * This makes it possible to use umh_complete to free + * the data structure in case of UMH_NO_WAIT. + */ + sub_info->complete = (wait == UMH_NO_WAIT) ? NULL : &done; + sub_info->wait = wait; + + queue_work(system_unbound_wq, &sub_info->work); + if (wait == UMH_NO_WAIT) /* task has freed sub_info */ + goto unlock; + + if (wait & UMH_KILLABLE) { + retval = wait_for_completion_killable(&done); + if (!retval) + goto wait_done; + + /* umh_complete() will see NULL and free sub_info */ + if (xchg(&sub_info->complete, NULL)) + goto unlock; + /* fallthrough, umh_complete() was already called */ + } + + wait_for_completion(&done); +wait_done: + retval = sub_info->retval; +out: + call_usermodehelper_freeinfo(sub_info); +unlock: + helper_unlock(); + return retval; +} +EXPORT_SYMBOL(call_usermodehelper_exec); + +/** + * call_usermodehelper() - prepare and start a usermode application + * @path: path to usermode executable + * @argv: arg vector for process + * @envp: environment for process + * @wait: wait for the application to finish and return status. + * when UMH_NO_WAIT don't wait at all, but you get no useful error back + * when the program couldn't be exec'ed. This makes it safe to call + * from interrupt context. + * + * This function is the equivalent to use call_usermodehelper_setup() and + * call_usermodehelper_exec(). + */ +int call_usermodehelper(const char *path, char **argv, char **envp, int wait) +{ + struct subprocess_info *info; + gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL; + + info = call_usermodehelper_setup(path, argv, envp, gfp_mask, + NULL, NULL, NULL); + if (info == NULL) + return -ENOMEM; + + return call_usermodehelper_exec(info, wait); +} +EXPORT_SYMBOL(call_usermodehelper); + +static int proc_cap_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table t; + unsigned long cap_array[_KERNEL_CAPABILITY_U32S]; + kernel_cap_t new_cap; + int err, i; + + if (write && (!capable(CAP_SETPCAP) || + !capable(CAP_SYS_MODULE))) + return -EPERM; + + /* + * convert from the global kernel_cap_t to the ulong array to print to + * userspace if this is a read. + */ + spin_lock(&umh_sysctl_lock); + for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++) { + if (table->data == CAP_BSET) + cap_array[i] = usermodehelper_bset.cap[i]; + else if (table->data == CAP_PI) + cap_array[i] = usermodehelper_inheritable.cap[i]; + else + BUG(); + } + spin_unlock(&umh_sysctl_lock); + + t = *table; + t.data = &cap_array; + + /* + * actually read or write and array of ulongs from userspace. Remember + * these are least significant 32 bits first + */ + err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos); + if (err < 0) + return err; + + /* + * convert from the sysctl array of ulongs to the kernel_cap_t + * internal representation + */ + for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++) + new_cap.cap[i] = cap_array[i]; + + /* + * Drop everything not in the new_cap (but don't add things) + */ + spin_lock(&umh_sysctl_lock); + if (write) { + if (table->data == CAP_BSET) + usermodehelper_bset = cap_intersect(usermodehelper_bset, new_cap); + if (table->data == CAP_PI) + usermodehelper_inheritable = cap_intersect(usermodehelper_inheritable, new_cap); + } + spin_unlock(&umh_sysctl_lock); + + return 0; +} + +struct ctl_table usermodehelper_table[] = { + { + .procname = "bset", + .data = CAP_BSET, + .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long), + .mode = 0600, + .proc_handler = proc_cap_handler, + }, + { + .procname = "inheritable", + .data = CAP_PI, + .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long), + .mode = 0600, + .proc_handler = proc_cap_handler, + }, + { } +}; diff --git a/lib/Kconfig b/lib/Kconfig index 6762529ad9e4597f473a07e9300bd0ac8a601c10..40b114a11d7cea28a6b93521fa58ad958d8ddbe4 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -575,4 +575,7 @@ config PARMAN config PRIME_NUMBERS tristate +config STRING_SELFTEST + bool "Test string functions" + endmenu diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 7396f5044397e26ab9ebebc031c797d63c24533c..b19c491cbc4e7934ab5bc78549c95423b91b2f7a 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1930,6 +1930,17 @@ config TEST_KMOD If unsure, say N. +config TEST_DEBUG_VIRTUAL + tristate "Test CONFIG_DEBUG_VIRTUAL feature" + depends on DEBUG_VIRTUAL + help + Test the kernel's ability to detect incorrect calls to + virt_to_phys() done against the non-linear part of the + kernel's virtual address map. + + If unsure, say N. + + source "samples/Kconfig" source "lib/Kconfig.kgdb" diff --git a/lib/Makefile b/lib/Makefile index 40c18372b301e5fe429fff598a077996e353966f..469ce5e24e4f9bb6444d4b7164ea22c8f6d441e7 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -62,6 +62,7 @@ obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o obj-$(CONFIG_TEST_UUID) += test_uuid.o obj-$(CONFIG_TEST_PARMAN) += test_parman.o obj-$(CONFIG_TEST_KMOD) += test_kmod.o +obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG diff --git a/lib/bitmap.c b/lib/bitmap.c index 9a532805364b6c039c30467a72a557ba8d532ca0..c82c61b66e166b521ff91d4e6055905d7431e6dc 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -513,7 +513,7 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen, int nmaskbits) { unsigned int a, b, old_a, old_b; - unsigned int group_size, used_size; + unsigned int group_size, used_size, off; int c, old_c, totaldigits, ndigits; const char __user __force *ubuf = (const char __user __force *)buf; int at_start, in_range, in_partial_range; @@ -599,6 +599,8 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen, a = old_a; b = old_b; old_a = old_b = 0; + } else { + used_size = group_size = b - a + 1; } /* if no digit is after '-', it's wrong*/ if (at_start && in_range) @@ -608,17 +610,9 @@ static int __bitmap_parselist(const char *buf, unsigned int buflen, if (b >= nmaskbits) return -ERANGE; while (a <= b) { - if (in_partial_range) { - static int pos_in_group = 1; - - if (pos_in_group <= used_size) - set_bit(a, maskp); - - if (a == b || ++pos_in_group > group_size) - pos_in_group = 1; - } else - set_bit(a, maskp); - a++; + off = min(b - a + 1, used_size); + bitmap_set(maskp, a, off); + a += group_size; } } while (buflen && c == ','); return 0; diff --git a/lib/cmdline.c b/lib/cmdline.c index 4c0888c4a68d9621717f9012d3ca4cc0b72df20b..171c19b6888e207bd663e9e41a87c7b0fad9a1de 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -244,5 +244,4 @@ char *next_arg(char *args, char **param, char **val) /* Chew up trailing spaces. */ return skip_spaces(next); - //return next; } diff --git a/lib/cpumask.c b/lib/cpumask.c index 4731a0895760e70cab07d59a0cff45c2590da450..8b1a1bd77539386a580dc79347518c76beb97e1d 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -5,6 +5,22 @@ #include #include +/** + * cpumask_next - get the next cpu in a cpumask + * @n: the cpu prior to the place to search (ie. return will be > @n) + * @srcp: the cpumask pointer + * + * Returns >= nr_cpu_ids if no further cpus set. + */ +unsigned int cpumask_next(int n, const struct cpumask *srcp) +{ + /* -1 is a legal arg here. */ + if (n != -1) + cpumask_check(n); + return find_next_bit(cpumask_bits(srcp), nr_cpumask_bits, n + 1); +} +EXPORT_SYMBOL(cpumask_next); + /** * cpumask_next_and - get the next cpu in *src1p & *src2p * @n: the cpu prior to the place to search (ie. return will be > @n) diff --git a/lib/hexdump.c b/lib/hexdump.c index 992457b1284c186d8e0d7b8c5d7b5fc349c18ef3..81b70ed37209697c083247264c9374b143c1749f 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -42,7 +43,7 @@ EXPORT_SYMBOL(hex_to_bin); * @src: ascii hexadecimal string * @count: result length * - * Return 0 on success, -1 in case of bad input. + * Return 0 on success, -EINVAL in case of bad input. */ int hex2bin(u8 *dst, const char *src, size_t count) { @@ -51,7 +52,7 @@ int hex2bin(u8 *dst, const char *src, size_t count) int lo = hex_to_bin(*src++); if ((hi < 0) || (lo < 0)) - return -1; + return -EINVAL; *dst++ = (hi << 4) | lo; } diff --git a/lib/interval_tree_test.c b/lib/interval_tree_test.c index df495fe814210c1c979b893f8723e9b66fa4498f..0e343fd29570194d775594fb8e5319682c8c3d5c 100644 --- a/lib/interval_tree_test.c +++ b/lib/interval_tree_test.c @@ -19,14 +19,14 @@ __param(bool, search_all, false, "Searches will iterate all nodes in the tree"); __param(uint, max_endpoint, ~0, "Largest value for the interval's endpoint"); -static struct rb_root root = RB_ROOT; +static struct rb_root_cached root = RB_ROOT_CACHED; static struct interval_tree_node *nodes = NULL; static u32 *queries = NULL; static struct rnd_state rnd; static inline unsigned long -search(struct rb_root *root, unsigned long start, unsigned long last) +search(struct rb_root_cached *root, unsigned long start, unsigned long last) { struct interval_tree_node *node; unsigned long results = 0; diff --git a/lib/oid_registry.c b/lib/oid_registry.c index 318f382a010d62f0f6b0c536ee038f41100e57cf..41b9e50711a72af31d7548d3aea5be1177a45a9d 100644 --- a/lib/oid_registry.c +++ b/lib/oid_registry.c @@ -142,9 +142,9 @@ int sprint_oid(const void *data, size_t datasize, char *buffer, size_t bufsize) } ret += count = snprintf(buffer, bufsize, ".%lu", num); buffer += count; - bufsize -= count; - if (bufsize == 0) + if (bufsize <= count) return -ENOBUFS; + bufsize -= count; } return ret; diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 9717e2a503746c0e5a01b2e570bcd3d74d31c837..8b1feca1230a08c4537036f517cfa4369b62dffa 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -463,7 +463,7 @@ radix_tree_node_free(struct radix_tree_node *node) * To make use of this facility, the radix tree must be initialised without * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE(). */ -static int __radix_tree_preload(gfp_t gfp_mask, unsigned nr) +static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr) { struct radix_tree_preload *rtp; struct radix_tree_node *node; @@ -2104,7 +2104,8 @@ EXPORT_SYMBOL(radix_tree_tagged); */ void idr_preload(gfp_t gfp_mask) { - __radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE); + if (__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE)) + preempt_disable(); } EXPORT_SYMBOL(idr_preload); @@ -2118,13 +2119,13 @@ EXPORT_SYMBOL(idr_preload); */ int ida_pre_get(struct ida *ida, gfp_t gfp) { - __radix_tree_preload(gfp, IDA_PRELOAD_SIZE); /* * The IDA API has no preload_end() equivalent. Instead, * ida_get_new() can return -EAGAIN, prompting the caller * to return to the ida_pre_get() step. */ - preempt_enable(); + if (!__radix_tree_preload(gfp, IDA_PRELOAD_SIZE)) + preempt_enable(); if (!this_cpu_read(ida_bitmap)) { struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp); diff --git a/lib/rbtree.c b/lib/rbtree.c index 4ba2828a67c0401b371b04cce7c52d566b5c0e74..ba4a9d165f1bed3c39651387def0008fc793fbd6 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -95,22 +95,35 @@ __rb_rotate_set_parents(struct rb_node *old, struct rb_node *new, static __always_inline void __rb_insert(struct rb_node *node, struct rb_root *root, + bool newleft, struct rb_node **leftmost, void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) { struct rb_node *parent = rb_red_parent(node), *gparent, *tmp; + if (newleft) + *leftmost = node; + while (true) { /* - * Loop invariant: node is red - * - * If there is a black parent, we are done. - * Otherwise, take some corrective action as we don't - * want a red root or two consecutive red nodes. + * Loop invariant: node is red. */ - if (!parent) { + if (unlikely(!parent)) { + /* + * The inserted node is root. Either this is the + * first node, or we recursed at Case 1 below and + * are no longer violating 4). + */ rb_set_parent_color(node, NULL, RB_BLACK); break; - } else if (rb_is_black(parent)) + } + + /* + * If there is a black parent, we are done. + * Otherwise, take some corrective action as, + * per 4), we don't want a red root or two + * consecutive red nodes. + */ + if(rb_is_black(parent)) break; gparent = rb_red_parent(parent); @@ -119,7 +132,7 @@ __rb_insert(struct rb_node *node, struct rb_root *root, if (parent != tmp) { /* parent == gparent->rb_left */ if (tmp && rb_is_red(tmp)) { /* - * Case 1 - color flips + * Case 1 - node's uncle is red (color flips). * * G g * / \ / \ @@ -142,7 +155,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root, tmp = parent->rb_right; if (node == tmp) { /* - * Case 2 - left rotate at parent + * Case 2 - node's uncle is black and node is + * the parent's right child (left rotate at parent). * * G G * / \ / \ @@ -166,7 +180,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root, } /* - * Case 3 - right rotate at gparent + * Case 3 - node's uncle is black and node is + * the parent's left child (right rotate at gparent). * * G P * / \ / \ @@ -434,19 +449,38 @@ static const struct rb_augment_callbacks dummy_callbacks = { void rb_insert_color(struct rb_node *node, struct rb_root *root) { - __rb_insert(node, root, dummy_rotate); + __rb_insert(node, root, false, NULL, dummy_rotate); } EXPORT_SYMBOL(rb_insert_color); void rb_erase(struct rb_node *node, struct rb_root *root) { struct rb_node *rebalance; - rebalance = __rb_erase_augmented(node, root, &dummy_callbacks); + rebalance = __rb_erase_augmented(node, root, + NULL, &dummy_callbacks); if (rebalance) ____rb_erase_color(rebalance, root, dummy_rotate); } EXPORT_SYMBOL(rb_erase); +void rb_insert_color_cached(struct rb_node *node, + struct rb_root_cached *root, bool leftmost) +{ + __rb_insert(node, &root->rb_root, leftmost, + &root->rb_leftmost, dummy_rotate); +} +EXPORT_SYMBOL(rb_insert_color_cached); + +void rb_erase_cached(struct rb_node *node, struct rb_root_cached *root) +{ + struct rb_node *rebalance; + rebalance = __rb_erase_augmented(node, &root->rb_root, + &root->rb_leftmost, &dummy_callbacks); + if (rebalance) + ____rb_erase_color(rebalance, &root->rb_root, dummy_rotate); +} +EXPORT_SYMBOL(rb_erase_cached); + /* * Augmented rbtree manipulation functions. * @@ -455,9 +489,10 @@ EXPORT_SYMBOL(rb_erase); */ void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, + bool newleft, struct rb_node **leftmost, void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) { - __rb_insert(node, root, augment_rotate); + __rb_insert(node, root, newleft, leftmost, augment_rotate); } EXPORT_SYMBOL(__rb_insert_augmented); @@ -502,7 +537,7 @@ struct rb_node *rb_next(const struct rb_node *node) * as we can. */ if (node->rb_right) { - node = node->rb_right; + node = node->rb_right; while (node->rb_left) node=node->rb_left; return (struct rb_node *)node; @@ -534,7 +569,7 @@ struct rb_node *rb_prev(const struct rb_node *node) * as we can. */ if (node->rb_left) { - node = node->rb_left; + node = node->rb_left; while (node->rb_right) node=node->rb_right; return (struct rb_node *)node; diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index 8b3c9dc882628fd9945b5901eba670f67f885adf..191a238e5a9d73e67ee763371cf3f9a4a3865792 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c @@ -1,11 +1,18 @@ #include +#include #include #include +#include #include -#define NODES 100 -#define PERF_LOOPS 100000 -#define CHECK_LOOPS 100 +#define __param(type, name, init, msg) \ + static type name = init; \ + module_param(name, type, 0444); \ + MODULE_PARM_DESC(name, msg); + +__param(int, nnodes, 100, "Number of nodes in the rb-tree"); +__param(int, perf_loops, 100000, "Number of iterations modifying the rb-tree"); +__param(int, check_loops, 100, "Number of iterations modifying and verifying the rb-tree"); struct test_node { u32 key; @@ -16,14 +23,14 @@ struct test_node { u32 augmented; }; -static struct rb_root root = RB_ROOT; -static struct test_node nodes[NODES]; +static struct rb_root_cached root = RB_ROOT_CACHED; +static struct test_node *nodes = NULL; static struct rnd_state rnd; -static void insert(struct test_node *node, struct rb_root *root) +static void insert(struct test_node *node, struct rb_root_cached *root) { - struct rb_node **new = &root->rb_node, *parent = NULL; + struct rb_node **new = &root->rb_root.rb_node, *parent = NULL; u32 key = node->key; while (*new) { @@ -35,14 +42,40 @@ static void insert(struct test_node *node, struct rb_root *root) } rb_link_node(&node->rb, parent, new); - rb_insert_color(&node->rb, root); + rb_insert_color(&node->rb, &root->rb_root); +} + +static void insert_cached(struct test_node *node, struct rb_root_cached *root) +{ + struct rb_node **new = &root->rb_root.rb_node, *parent = NULL; + u32 key = node->key; + bool leftmost = true; + + while (*new) { + parent = *new; + if (key < rb_entry(parent, struct test_node, rb)->key) + new = &parent->rb_left; + else { + new = &parent->rb_right; + leftmost = false; + } + } + + rb_link_node(&node->rb, parent, new); + rb_insert_color_cached(&node->rb, root, leftmost); } -static inline void erase(struct test_node *node, struct rb_root *root) +static inline void erase(struct test_node *node, struct rb_root_cached *root) { - rb_erase(&node->rb, root); + rb_erase(&node->rb, &root->rb_root); } +static inline void erase_cached(struct test_node *node, struct rb_root_cached *root) +{ + rb_erase_cached(&node->rb, root); +} + + static inline u32 augment_recompute(struct test_node *node) { u32 max = node->val, child_augmented; @@ -64,9 +97,10 @@ static inline u32 augment_recompute(struct test_node *node) RB_DECLARE_CALLBACKS(static, augment_callbacks, struct test_node, rb, u32, augmented, augment_recompute) -static void insert_augmented(struct test_node *node, struct rb_root *root) +static void insert_augmented(struct test_node *node, + struct rb_root_cached *root) { - struct rb_node **new = &root->rb_node, *rb_parent = NULL; + struct rb_node **new = &root->rb_root.rb_node, *rb_parent = NULL; u32 key = node->key; u32 val = node->val; struct test_node *parent; @@ -84,18 +118,53 @@ static void insert_augmented(struct test_node *node, struct rb_root *root) node->augmented = val; rb_link_node(&node->rb, rb_parent, new); - rb_insert_augmented(&node->rb, root, &augment_callbacks); + rb_insert_augmented(&node->rb, &root->rb_root, &augment_callbacks); +} + +static void insert_augmented_cached(struct test_node *node, + struct rb_root_cached *root) +{ + struct rb_node **new = &root->rb_root.rb_node, *rb_parent = NULL; + u32 key = node->key; + u32 val = node->val; + struct test_node *parent; + bool leftmost = true; + + while (*new) { + rb_parent = *new; + parent = rb_entry(rb_parent, struct test_node, rb); + if (parent->augmented < val) + parent->augmented = val; + if (key < parent->key) + new = &parent->rb.rb_left; + else { + new = &parent->rb.rb_right; + leftmost = false; + } + } + + node->augmented = val; + rb_link_node(&node->rb, rb_parent, new); + rb_insert_augmented_cached(&node->rb, root, + leftmost, &augment_callbacks); +} + + +static void erase_augmented(struct test_node *node, struct rb_root_cached *root) +{ + rb_erase_augmented(&node->rb, &root->rb_root, &augment_callbacks); } -static void erase_augmented(struct test_node *node, struct rb_root *root) +static void erase_augmented_cached(struct test_node *node, + struct rb_root_cached *root) { - rb_erase_augmented(&node->rb, root, &augment_callbacks); + rb_erase_augmented_cached(&node->rb, root, &augment_callbacks); } static void init(void) { int i; - for (i = 0; i < NODES; i++) { + for (i = 0; i < nnodes; i++) { nodes[i].key = prandom_u32_state(&rnd); nodes[i].val = prandom_u32_state(&rnd); } @@ -118,7 +187,7 @@ static void check_postorder_foreach(int nr_nodes) { struct test_node *cur, *n; int count = 0; - rbtree_postorder_for_each_entry_safe(cur, n, &root, rb) + rbtree_postorder_for_each_entry_safe(cur, n, &root.rb_root, rb) count++; WARN_ON_ONCE(count != nr_nodes); @@ -128,7 +197,7 @@ static void check_postorder(int nr_nodes) { struct rb_node *rb; int count = 0; - for (rb = rb_first_postorder(&root); rb; rb = rb_next_postorder(rb)) + for (rb = rb_first_postorder(&root.rb_root); rb; rb = rb_next_postorder(rb)) count++; WARN_ON_ONCE(count != nr_nodes); @@ -140,7 +209,7 @@ static void check(int nr_nodes) int count = 0, blacks = 0; u32 prev_key = 0; - for (rb = rb_first(&root); rb; rb = rb_next(rb)) { + for (rb = rb_first(&root.rb_root); rb; rb = rb_next(rb)) { struct test_node *node = rb_entry(rb, struct test_node, rb); WARN_ON_ONCE(node->key < prev_key); WARN_ON_ONCE(is_red(rb) && @@ -155,7 +224,7 @@ static void check(int nr_nodes) } WARN_ON_ONCE(count != nr_nodes); - WARN_ON_ONCE(count < (1 << black_path_count(rb_last(&root))) - 1); + WARN_ON_ONCE(count < (1 << black_path_count(rb_last(&root.rb_root))) - 1); check_postorder(nr_nodes); check_postorder_foreach(nr_nodes); @@ -166,7 +235,7 @@ static void check_augmented(int nr_nodes) struct rb_node *rb; check(nr_nodes); - for (rb = rb_first(&root); rb; rb = rb_next(rb)) { + for (rb = rb_first(&root.rb_root); rb; rb = rb_next(rb)) { struct test_node *node = rb_entry(rb, struct test_node, rb); WARN_ON_ONCE(node->augmented != augment_recompute(node)); } @@ -176,6 +245,11 @@ static int __init rbtree_test_init(void) { int i, j; cycles_t time1, time2, time; + struct rb_node *node; + + nodes = kmalloc(nnodes * sizeof(*nodes), GFP_KERNEL); + if (!nodes) + return -ENOMEM; printk(KERN_ALERT "rbtree testing"); @@ -184,27 +258,88 @@ static int __init rbtree_test_init(void) time1 = get_cycles(); - for (i = 0; i < PERF_LOOPS; i++) { - for (j = 0; j < NODES; j++) + for (i = 0; i < perf_loops; i++) { + for (j = 0; j < nnodes; j++) insert(nodes + j, &root); - for (j = 0; j < NODES; j++) + for (j = 0; j < nnodes; j++) erase(nodes + j, &root); } time2 = get_cycles(); time = time2 - time1; - time = div_u64(time, PERF_LOOPS); - printk(" -> %llu cycles\n", (unsigned long long)time); + time = div_u64(time, perf_loops); + printk(" -> test 1 (latency of nnodes insert+delete): %llu cycles\n", + (unsigned long long)time); + + time1 = get_cycles(); + + for (i = 0; i < perf_loops; i++) { + for (j = 0; j < nnodes; j++) + insert_cached(nodes + j, &root); + for (j = 0; j < nnodes; j++) + erase_cached(nodes + j, &root); + } + + time2 = get_cycles(); + time = time2 - time1; + + time = div_u64(time, perf_loops); + printk(" -> test 2 (latency of nnodes cached insert+delete): %llu cycles\n", + (unsigned long long)time); + + for (i = 0; i < nnodes; i++) + insert(nodes + i, &root); + + time1 = get_cycles(); + + for (i = 0; i < perf_loops; i++) { + for (node = rb_first(&root.rb_root); node; node = rb_next(node)) + ; + } + + time2 = get_cycles(); + time = time2 - time1; + + time = div_u64(time, perf_loops); + printk(" -> test 3 (latency of inorder traversal): %llu cycles\n", + (unsigned long long)time); + + time1 = get_cycles(); + + for (i = 0; i < perf_loops; i++) + node = rb_first(&root.rb_root); + + time2 = get_cycles(); + time = time2 - time1; + + time = div_u64(time, perf_loops); + printk(" -> test 4 (latency to fetch first node)\n"); + printk(" non-cached: %llu cycles\n", (unsigned long long)time); + + time1 = get_cycles(); + + for (i = 0; i < perf_loops; i++) + node = rb_first_cached(&root); + + time2 = get_cycles(); + time = time2 - time1; + + time = div_u64(time, perf_loops); + printk(" cached: %llu cycles\n", (unsigned long long)time); - for (i = 0; i < CHECK_LOOPS; i++) { + for (i = 0; i < nnodes; i++) + erase(nodes + i, &root); + + /* run checks */ + for (i = 0; i < check_loops; i++) { init(); - for (j = 0; j < NODES; j++) { + for (j = 0; j < nnodes; j++) { check(j); insert(nodes + j, &root); } - for (j = 0; j < NODES; j++) { - check(NODES - j); + for (j = 0; j < nnodes; j++) { + check(nnodes - j); erase(nodes + j, &root); } check(0); @@ -216,32 +351,49 @@ static int __init rbtree_test_init(void) time1 = get_cycles(); - for (i = 0; i < PERF_LOOPS; i++) { - for (j = 0; j < NODES; j++) + for (i = 0; i < perf_loops; i++) { + for (j = 0; j < nnodes; j++) insert_augmented(nodes + j, &root); - for (j = 0; j < NODES; j++) + for (j = 0; j < nnodes; j++) erase_augmented(nodes + j, &root); } time2 = get_cycles(); time = time2 - time1; - time = div_u64(time, PERF_LOOPS); - printk(" -> %llu cycles\n", (unsigned long long)time); + time = div_u64(time, perf_loops); + printk(" -> test 1 (latency of nnodes insert+delete): %llu cycles\n", (unsigned long long)time); + + time1 = get_cycles(); + + for (i = 0; i < perf_loops; i++) { + for (j = 0; j < nnodes; j++) + insert_augmented_cached(nodes + j, &root); + for (j = 0; j < nnodes; j++) + erase_augmented_cached(nodes + j, &root); + } + + time2 = get_cycles(); + time = time2 - time1; + + time = div_u64(time, perf_loops); + printk(" -> test 2 (latency of nnodes cached insert+delete): %llu cycles\n", (unsigned long long)time); - for (i = 0; i < CHECK_LOOPS; i++) { + for (i = 0; i < check_loops; i++) { init(); - for (j = 0; j < NODES; j++) { + for (j = 0; j < nnodes; j++) { check_augmented(j); insert_augmented(nodes + j, &root); } - for (j = 0; j < NODES; j++) { - check_augmented(NODES - j); + for (j = 0; j < nnodes; j++) { + check_augmented(nnodes - j); erase_augmented(nodes + j, &root); } check_augmented(0); } + kfree(nodes); + return -EAGAIN; /* Fail will directly unload the module */ } diff --git a/lib/string.c b/lib/string.c index ebbb99c775bdab12e6036a85bed06d7521cf94b4..9921dc202db440a0a1b20aa19b85555e634fe3b5 100644 --- a/lib/string.c +++ b/lib/string.c @@ -723,6 +723,72 @@ void memzero_explicit(void *s, size_t count) } EXPORT_SYMBOL(memzero_explicit); +#ifndef __HAVE_ARCH_MEMSET16 +/** + * memset16() - Fill a memory area with a uint16_t + * @s: Pointer to the start of the area. + * @v: The value to fill the area with + * @count: The number of values to store + * + * Differs from memset() in that it fills with a uint16_t instead + * of a byte. Remember that @count is the number of uint16_ts to + * store, not the number of bytes. + */ +void *memset16(uint16_t *s, uint16_t v, size_t count) +{ + uint16_t *xs = s; + + while (count--) + *xs++ = v; + return s; +} +EXPORT_SYMBOL(memset16); +#endif + +#ifndef __HAVE_ARCH_MEMSET32 +/** + * memset32() - Fill a memory area with a uint32_t + * @s: Pointer to the start of the area. + * @v: The value to fill the area with + * @count: The number of values to store + * + * Differs from memset() in that it fills with a uint32_t instead + * of a byte. Remember that @count is the number of uint32_ts to + * store, not the number of bytes. + */ +void *memset32(uint32_t *s, uint32_t v, size_t count) +{ + uint32_t *xs = s; + + while (count--) + *xs++ = v; + return s; +} +EXPORT_SYMBOL(memset32); +#endif + +#ifndef __HAVE_ARCH_MEMSET64 +/** + * memset64() - Fill a memory area with a uint64_t + * @s: Pointer to the start of the area. + * @v: The value to fill the area with + * @count: The number of values to store + * + * Differs from memset() in that it fills with a uint64_t instead + * of a byte. Remember that @count is the number of uint64_ts to + * store, not the number of bytes. + */ +void *memset64(uint64_t *s, uint64_t v, size_t count) +{ + uint64_t *xs = s; + + while (count--) + *xs++ = v; + return s; +} +EXPORT_SYMBOL(memset64); +#endif + #ifndef __HAVE_ARCH_MEMCPY /** * memcpy - Copy one area of memory to another @@ -985,3 +1051,144 @@ void fortify_panic(const char *name) BUG(); } EXPORT_SYMBOL(fortify_panic); + +#ifdef CONFIG_STRING_SELFTEST +#include +#include + +static __init int memset16_selftest(void) +{ + unsigned i, j, k; + u16 v, *p; + + p = kmalloc(256 * 2 * 2, GFP_KERNEL); + if (!p) + return -1; + + for (i = 0; i < 256; i++) { + for (j = 0; j < 256; j++) { + memset(p, 0xa1, 256 * 2 * sizeof(v)); + memset16(p + i, 0xb1b2, j); + for (k = 0; k < 512; k++) { + v = p[k]; + if (k < i) { + if (v != 0xa1a1) + goto fail; + } else if (k < i + j) { + if (v != 0xb1b2) + goto fail; + } else { + if (v != 0xa1a1) + goto fail; + } + } + } + } + +fail: + kfree(p); + if (i < 256) + return (i << 24) | (j << 16) | k; + return 0; +} + +static __init int memset32_selftest(void) +{ + unsigned i, j, k; + u32 v, *p; + + p = kmalloc(256 * 2 * 4, GFP_KERNEL); + if (!p) + return -1; + + for (i = 0; i < 256; i++) { + for (j = 0; j < 256; j++) { + memset(p, 0xa1, 256 * 2 * sizeof(v)); + memset32(p + i, 0xb1b2b3b4, j); + for (k = 0; k < 512; k++) { + v = p[k]; + if (k < i) { + if (v != 0xa1a1a1a1) + goto fail; + } else if (k < i + j) { + if (v != 0xb1b2b3b4) + goto fail; + } else { + if (v != 0xa1a1a1a1) + goto fail; + } + } + } + } + +fail: + kfree(p); + if (i < 256) + return (i << 24) | (j << 16) | k; + return 0; +} + +static __init int memset64_selftest(void) +{ + unsigned i, j, k; + u64 v, *p; + + p = kmalloc(256 * 2 * 8, GFP_KERNEL); + if (!p) + return -1; + + for (i = 0; i < 256; i++) { + for (j = 0; j < 256; j++) { + memset(p, 0xa1, 256 * 2 * sizeof(v)); + memset64(p + i, 0xb1b2b3b4b5b6b7b8ULL, j); + for (k = 0; k < 512; k++) { + v = p[k]; + if (k < i) { + if (v != 0xa1a1a1a1a1a1a1a1ULL) + goto fail; + } else if (k < i + j) { + if (v != 0xb1b2b3b4b5b6b7b8ULL) + goto fail; + } else { + if (v != 0xa1a1a1a1a1a1a1a1ULL) + goto fail; + } + } + } + } + +fail: + kfree(p); + if (i < 256) + return (i << 24) | (j << 16) | k; + return 0; +} + +static __init int string_selftest_init(void) +{ + int test, subtest; + + test = 1; + subtest = memset16_selftest(); + if (subtest) + goto fail; + + test = 2; + subtest = memset32_selftest(); + if (subtest) + goto fail; + + test = 3; + subtest = memset64_selftest(); + if (subtest) + goto fail; + + pr_info("String selftests succeeded\n"); + return 0; +fail: + pr_crit("String selftest failure %d.%08x\n", test, subtest); + return 0; +} + +module_init(string_selftest_init); +#endif /* CONFIG_STRING_SELFTEST */ diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 2526a2975c51bc24e329bdd542b3dd2b0ebfbdec..599c6713f2a2aaa8157a230f559ab4a006bbcd67 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -165,6 +165,96 @@ static void __init test_zero_fill_copy(void) expect_eq_pbl("128-1023", bmap2, 1024); } +#define PARSE_TIME 0x1 + +struct test_bitmap_parselist{ + const int errno; + const char *in; + const unsigned long *expected; + const int nbits; + const int flags; +}; + +static const unsigned long exp[] __initconst = { + BITMAP_FROM_U64(1), + BITMAP_FROM_U64(2), + BITMAP_FROM_U64(0x0000ffff), + BITMAP_FROM_U64(0xffff0000), + BITMAP_FROM_U64(0x55555555), + BITMAP_FROM_U64(0xaaaaaaaa), + BITMAP_FROM_U64(0x11111111), + BITMAP_FROM_U64(0x22222222), + BITMAP_FROM_U64(0xffffffff), + BITMAP_FROM_U64(0xfffffffe), + BITMAP_FROM_U64(0x3333333311111111), + BITMAP_FROM_U64(0xffffffff77777777) +}; + +static const unsigned long exp2[] __initconst = { + BITMAP_FROM_U64(0x3333333311111111), + BITMAP_FROM_U64(0xffffffff77777777) +}; + +static const struct test_bitmap_parselist parselist_tests[] __initconst = { +#define step (sizeof(u64) / sizeof(unsigned long)) + + {0, "0", &exp[0], 8, 0}, + {0, "1", &exp[1 * step], 8, 0}, + {0, "0-15", &exp[2 * step], 32, 0}, + {0, "16-31", &exp[3 * step], 32, 0}, + {0, "0-31:1/2", &exp[4 * step], 32, 0}, + {0, "1-31:1/2", &exp[5 * step], 32, 0}, + {0, "0-31:1/4", &exp[6 * step], 32, 0}, + {0, "1-31:1/4", &exp[7 * step], 32, 0}, + {0, "0-31:4/4", &exp[8 * step], 32, 0}, + {0, "1-31:4/4", &exp[9 * step], 32, 0}, + {0, "0-31:1/4,32-63:2/4", &exp[10 * step], 64, 0}, + {0, "0-31:3/4,32-63:4/4", &exp[11 * step], 64, 0}, + + {0, "0-31:1/4,32-63:2/4,64-95:3/4,96-127:4/4", exp2, 128, 0}, + + {0, "0-2047:128/256", NULL, 2048, PARSE_TIME}, + + {-EINVAL, "-1", NULL, 8, 0}, + {-EINVAL, "-0", NULL, 8, 0}, + {-EINVAL, "10-1", NULL, 8, 0}, + {-EINVAL, "0-31:10/1", NULL, 8, 0}, +}; + +static void __init test_bitmap_parselist(void) +{ + int i; + int err; + cycles_t cycles; + DECLARE_BITMAP(bmap, 2048); + + for (i = 0; i < ARRAY_SIZE(parselist_tests); i++) { +#define ptest parselist_tests[i] + + cycles = get_cycles(); + err = bitmap_parselist(ptest.in, bmap, ptest.nbits); + cycles = get_cycles() - cycles; + + if (err != ptest.errno) { + pr_err("test %d: input is %s, errno is %d, expected %d\n", + i, ptest.in, err, ptest.errno); + continue; + } + + if (!err && ptest.expected + && !__bitmap_equal(bmap, ptest.expected, ptest.nbits)) { + pr_err("test %d: input is %s, result is 0x%lx, expected 0x%lx\n", + i, ptest.in, bmap[0], *ptest.expected); + continue; + } + + if (ptest.flags & PARSE_TIME) + pr_err("test %d: input is '%s' OK, Time: %llu\n", + i, ptest.in, + (unsigned long long)cycles); + } +} + static void __init test_bitmap_u32_array_conversions(void) { DECLARE_BITMAP(bmap1, 1024); @@ -365,6 +455,7 @@ static int __init test_bitmap_init(void) { test_zero_fill_copy(); test_bitmap_u32_array_conversions(); + test_bitmap_parselist(); test_mem_optimisations(); if (failed_tests == 0) diff --git a/lib/test_debug_virtual.c b/lib/test_debug_virtual.c new file mode 100644 index 0000000000000000000000000000000000000000..b9cdeecc19dc303f0167433457f41f5325684f6a --- /dev/null +++ b/lib/test_debug_virtual.c @@ -0,0 +1,49 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#ifdef CONFIG_MIPS +#include +#endif + +struct foo { + unsigned int bar; +}; + +struct foo *foo; + +static int __init test_debug_virtual_init(void) +{ + phys_addr_t pa; + void *va; + + va = (void *)VMALLOC_START; + pa = virt_to_phys(va); + + pr_info("PA: %pa for VA: 0x%lx\n", &pa, (unsigned long)va); + + foo = kzalloc(sizeof(*foo), GFP_KERNEL); + if (!foo) + return -ENOMEM; + + pa = virt_to_phys(foo); + va = foo; + pr_info("PA: %pa for VA: 0x%lx\n", &pa, (unsigned long)va); + + return 0; +} +module_init(test_debug_virtual_init); + +static void __exit test_debug_virtual_exit(void) +{ + kfree(foo); +} +module_exit(test_debug_virtual_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Test module for CONFIG_DEBUG_VIRTUAL"); diff --git a/lib/test_kmod.c b/lib/test_kmod.c index ff9148969b9233ba7502b992b026d31e100be497..fba78d25e82569e57845d16b36c271d0848099b0 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -924,7 +924,7 @@ static int test_dev_config_update_uint_range(struct kmod_test_device *test_dev, if (ret) return ret; - if (new < min || new > max || new > UINT_MAX) + if (new < min || new > max) return -EINVAL; mutex_lock(&test_dev->config_mutex); @@ -946,7 +946,7 @@ static int test_dev_config_update_int(struct kmod_test_device *test_dev, if (ret) return ret; - if (new > INT_MAX || new < INT_MIN) + if (new < INT_MIN || new > INT_MAX) return -EINVAL; mutex_lock(&test_dev->config_mutex); diff --git a/mm/Kconfig b/mm/Kconfig index 0ded10a22639472daf48453d4f6754fc5a626c4b..9c4bdddd80c2123ac522c7d6c08453cca6c8445f 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -262,6 +262,9 @@ config MIGRATION config ARCH_ENABLE_HUGEPAGE_MIGRATION bool +config ARCH_ENABLE_THP_MIGRATION + bool + config PHYS_ADDR_T_64BIT def_bool 64BIT || ARCH_PHYS_ADDR_T_64BIT @@ -673,7 +676,7 @@ config ARCH_HAS_ZONE_DEVICE bool config ZONE_DEVICE - bool "Device memory (pmem, etc...) hotplug support" + bool "Device memory (pmem, HMM, etc...) hotplug support" depends on MEMORY_HOTPLUG depends on MEMORY_HOTREMOVE depends on SPARSEMEM_VMEMMAP @@ -689,6 +692,55 @@ config ZONE_DEVICE If FS_DAX is enabled, then say Y. +config ARCH_HAS_HMM + bool + default y + depends on (X86_64 || PPC64) + depends on ZONE_DEVICE + depends on MMU && 64BIT + depends on MEMORY_HOTPLUG + depends on MEMORY_HOTREMOVE + depends on SPARSEMEM_VMEMMAP + +config MIGRATE_VMA_HELPER + bool + +config HMM + bool + select MIGRATE_VMA_HELPER + +config HMM_MIRROR + bool "HMM mirror CPU page table into a device page table" + depends on ARCH_HAS_HMM + select MMU_NOTIFIER + select HMM + help + Select HMM_MIRROR if you want to mirror range of the CPU page table of a + process into a device page table. Here, mirror means "keep synchronized". + Prerequisites: the device must provide the ability to write-protect its + page tables (at PAGE_SIZE granularity), and must be able to recover from + the resulting potential page faults. + +config DEVICE_PRIVATE + bool "Unaddressable device memory (GPU memory, ...)" + depends on ARCH_HAS_HMM + select HMM + + help + Allows creation of struct pages to represent unaddressable device + memory; i.e., memory that is only accessible from the device (or + group of devices). You likely also want to select HMM_MIRROR. + +config DEVICE_PUBLIC + bool "Addressable device memory (like GPU memory)" + depends on ARCH_HAS_HMM + select HMM + + help + Allows creation of struct pages to represent addressable device + memory; i.e., memory that is accessible from both the device and + the CPU + config FRAME_VECTOR bool diff --git a/mm/Makefile b/mm/Makefile index 411bd24d4a7c78dabc1f508201feb5795a8c5ad8..e3ac3aeb533badc264dac11579192acb101a693c 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -104,3 +104,4 @@ obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o +obj-$(CONFIG_HMM) += hmm.o diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index b06d9fe23a28c14f71c3263daaa84965dadeee45..68d28924ba79d66cec76535f19772de6dc429522 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -139,6 +139,14 @@ int balloon_page_migrate(struct address_space *mapping, { struct balloon_dev_info *balloon = balloon_page_device(page); + /* + * We can not easily support the no copy case here so ignore it as it + * is unlikely to be use with ballon pages. See include/linux/hmm.h for + * user of the MIGRATE_SYNC_NO_COPY mode. + */ + if (mode == MIGRATE_SYNC_NO_COPY) + return -EINVAL; + VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); diff --git a/mm/fadvise.c b/mm/fadvise.c index a430131125815803ba25a7014917c6118087dd5c..702f239cd6db534b20276bce62ae1d2176bdb664 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c @@ -52,7 +52,9 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) goto out; } - if (IS_DAX(inode)) { + bdi = inode_to_bdi(mapping->host); + + if (IS_DAX(inode) || (bdi == &noop_backing_dev_info)) { switch (advice) { case POSIX_FADV_NORMAL: case POSIX_FADV_RANDOM: @@ -75,8 +77,6 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) else endbyte--; /* inclusive */ - bdi = inode_to_bdi(mapping->host); - switch (advice) { case POSIX_FADV_NORMAL: f.file->f_ra.ra_pages = bdi->ra_pages; diff --git a/mm/gup.c b/mm/gup.c index 33d651deeae2a5c1aaa47c03b40110ae4330f781..b2b4d4263768d82d61b99d2a2125251345fa778c 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -234,6 +234,16 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, return page; return no_page_table(vma, flags); } +retry: + if (!pmd_present(*pmd)) { + if (likely(!(flags & FOLL_MIGRATION))) + return no_page_table(vma, flags); + VM_BUG_ON(thp_migration_supported() && + !is_pmd_migration_entry(*pmd)); + if (is_pmd_migration_entry(*pmd)) + pmd_migration_entry_wait(mm, pmd); + goto retry; + } if (pmd_devmap(*pmd)) { ptl = pmd_lock(mm, pmd); page = follow_devmap_pmd(vma, address, pmd, flags); @@ -247,7 +257,15 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, if ((flags & FOLL_NUMA) && pmd_protnone(*pmd)) return no_page_table(vma, flags); +retry_locked: ptl = pmd_lock(mm, pmd); + if (unlikely(!pmd_present(*pmd))) { + spin_unlock(ptl); + if (likely(!(flags & FOLL_MIGRATION))) + return no_page_table(vma, flags); + pmd_migration_entry_wait(mm, pmd); + goto retry_locked; + } if (unlikely(!pmd_trans_huge(*pmd))) { spin_unlock(ptl); return follow_page_pte(vma, address, pmd, flags); @@ -424,7 +442,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address, pud = pud_offset(p4d, address); BUG_ON(pud_none(*pud)); pmd = pmd_offset(pud, address); - if (pmd_none(*pmd)) + if (!pmd_present(*pmd)) return -EFAULT; VM_BUG_ON(pmd_trans_huge(*pmd)); pte = pte_offset_map(pmd, address); @@ -438,6 +456,13 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address, if ((gup_flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(*pte))) goto unmap; *page = pte_page(*pte); + + /* + * This should never happen (a device public page in the gate + * area). + */ + if (is_device_public_page(*page)) + goto unmap; } get_page(*page); out: @@ -1534,7 +1559,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, pmd_t pmd = READ_ONCE(*pmdp); next = pmd_addr_end(addr, end); - if (pmd_none(pmd)) + if (!pmd_present(pmd)) return 0; if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd))) { diff --git a/mm/hmm.c b/mm/hmm.c new file mode 100644 index 0000000000000000000000000000000000000000..a88a847bccba700a1c0f51d0a819dcf365995ed1 --- /dev/null +++ b/mm/hmm.c @@ -0,0 +1,1257 @@ +/* + * Copyright 2013 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Authors: Jérôme Glisse + */ +/* + * Refer to include/linux/hmm.h for information about heterogeneous memory + * management or HMM for short. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PA_SECTION_SIZE (1UL << PA_SECTION_SHIFT) + +#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC) +/* + * Device private memory see HMM (Documentation/vm/hmm.txt) or hmm.h + */ +DEFINE_STATIC_KEY_FALSE(device_private_key); +EXPORT_SYMBOL(device_private_key); +#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ + + +#if IS_ENABLED(CONFIG_HMM_MIRROR) +static const struct mmu_notifier_ops hmm_mmu_notifier_ops; + +/* + * struct hmm - HMM per mm struct + * + * @mm: mm struct this HMM struct is bound to + * @lock: lock protecting ranges list + * @sequence: we track updates to the CPU page table with a sequence number + * @ranges: list of range being snapshotted + * @mirrors: list of mirrors for this mm + * @mmu_notifier: mmu notifier to track updates to CPU page table + * @mirrors_sem: read/write semaphore protecting the mirrors list + */ +struct hmm { + struct mm_struct *mm; + spinlock_t lock; + atomic_t sequence; + struct list_head ranges; + struct list_head mirrors; + struct mmu_notifier mmu_notifier; + struct rw_semaphore mirrors_sem; +}; + +/* + * hmm_register - register HMM against an mm (HMM internal) + * + * @mm: mm struct to attach to + * + * This is not intended to be used directly by device drivers. It allocates an + * HMM struct if mm does not have one, and initializes it. + */ +static struct hmm *hmm_register(struct mm_struct *mm) +{ + struct hmm *hmm = READ_ONCE(mm->hmm); + bool cleanup = false; + + /* + * The hmm struct can only be freed once the mm_struct goes away, + * hence we should always have pre-allocated an new hmm struct + * above. + */ + if (hmm) + return hmm; + + hmm = kmalloc(sizeof(*hmm), GFP_KERNEL); + if (!hmm) + return NULL; + INIT_LIST_HEAD(&hmm->mirrors); + init_rwsem(&hmm->mirrors_sem); + atomic_set(&hmm->sequence, 0); + hmm->mmu_notifier.ops = NULL; + INIT_LIST_HEAD(&hmm->ranges); + spin_lock_init(&hmm->lock); + hmm->mm = mm; + + /* + * We should only get here if hold the mmap_sem in write mode ie on + * registration of first mirror through hmm_mirror_register() + */ + hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops; + if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) { + kfree(hmm); + return NULL; + } + + spin_lock(&mm->page_table_lock); + if (!mm->hmm) + mm->hmm = hmm; + else + cleanup = true; + spin_unlock(&mm->page_table_lock); + + if (cleanup) { + mmu_notifier_unregister(&hmm->mmu_notifier, mm); + kfree(hmm); + } + + return mm->hmm; +} + +void hmm_mm_destroy(struct mm_struct *mm) +{ + kfree(mm->hmm); +} + +static void hmm_invalidate_range(struct hmm *hmm, + enum hmm_update_type action, + unsigned long start, + unsigned long end) +{ + struct hmm_mirror *mirror; + struct hmm_range *range; + + spin_lock(&hmm->lock); + list_for_each_entry(range, &hmm->ranges, list) { + unsigned long addr, idx, npages; + + if (end < range->start || start >= range->end) + continue; + + range->valid = false; + addr = max(start, range->start); + idx = (addr - range->start) >> PAGE_SHIFT; + npages = (min(range->end, end) - addr) >> PAGE_SHIFT; + memset(&range->pfns[idx], 0, sizeof(*range->pfns) * npages); + } + spin_unlock(&hmm->lock); + + down_read(&hmm->mirrors_sem); + list_for_each_entry(mirror, &hmm->mirrors, list) + mirror->ops->sync_cpu_device_pagetables(mirror, action, + start, end); + up_read(&hmm->mirrors_sem); +} + +static void hmm_invalidate_range_start(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + struct hmm *hmm = mm->hmm; + + VM_BUG_ON(!hmm); + + atomic_inc(&hmm->sequence); +} + +static void hmm_invalidate_range_end(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + struct hmm *hmm = mm->hmm; + + VM_BUG_ON(!hmm); + + hmm_invalidate_range(mm->hmm, HMM_UPDATE_INVALIDATE, start, end); +} + +static const struct mmu_notifier_ops hmm_mmu_notifier_ops = { + .invalidate_range_start = hmm_invalidate_range_start, + .invalidate_range_end = hmm_invalidate_range_end, +}; + +/* + * hmm_mirror_register() - register a mirror against an mm + * + * @mirror: new mirror struct to register + * @mm: mm to register against + * + * To start mirroring a process address space, the device driver must register + * an HMM mirror struct. + * + * THE mm->mmap_sem MUST BE HELD IN WRITE MODE ! + */ +int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm) +{ + /* Sanity check */ + if (!mm || !mirror || !mirror->ops) + return -EINVAL; + + mirror->hmm = hmm_register(mm); + if (!mirror->hmm) + return -ENOMEM; + + down_write(&mirror->hmm->mirrors_sem); + list_add(&mirror->list, &mirror->hmm->mirrors); + up_write(&mirror->hmm->mirrors_sem); + + return 0; +} +EXPORT_SYMBOL(hmm_mirror_register); + +/* + * hmm_mirror_unregister() - unregister a mirror + * + * @mirror: new mirror struct to register + * + * Stop mirroring a process address space, and cleanup. + */ +void hmm_mirror_unregister(struct hmm_mirror *mirror) +{ + struct hmm *hmm = mirror->hmm; + + down_write(&hmm->mirrors_sem); + list_del(&mirror->list); + up_write(&hmm->mirrors_sem); +} +EXPORT_SYMBOL(hmm_mirror_unregister); + +struct hmm_vma_walk { + struct hmm_range *range; + unsigned long last; + bool fault; + bool block; + bool write; +}; + +static int hmm_vma_do_fault(struct mm_walk *walk, + unsigned long addr, + hmm_pfn_t *pfn) +{ + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_REMOTE; + struct hmm_vma_walk *hmm_vma_walk = walk->private; + struct vm_area_struct *vma = walk->vma; + int r; + + flags |= hmm_vma_walk->block ? 0 : FAULT_FLAG_ALLOW_RETRY; + flags |= hmm_vma_walk->write ? FAULT_FLAG_WRITE : 0; + r = handle_mm_fault(vma, addr, flags); + if (r & VM_FAULT_RETRY) + return -EBUSY; + if (r & VM_FAULT_ERROR) { + *pfn = HMM_PFN_ERROR; + return -EFAULT; + } + + return -EAGAIN; +} + +static void hmm_pfns_special(hmm_pfn_t *pfns, + unsigned long addr, + unsigned long end) +{ + for (; addr < end; addr += PAGE_SIZE, pfns++) + *pfns = HMM_PFN_SPECIAL; +} + +static int hmm_pfns_bad(unsigned long addr, + unsigned long end, + struct mm_walk *walk) +{ + struct hmm_range *range = walk->private; + hmm_pfn_t *pfns = range->pfns; + unsigned long i; + + i = (addr - range->start) >> PAGE_SHIFT; + for (; addr < end; addr += PAGE_SIZE, i++) + pfns[i] = HMM_PFN_ERROR; + + return 0; +} + +static void hmm_pfns_clear(hmm_pfn_t *pfns, + unsigned long addr, + unsigned long end) +{ + for (; addr < end; addr += PAGE_SIZE, pfns++) + *pfns = 0; +} + +static int hmm_vma_walk_hole(unsigned long addr, + unsigned long end, + struct mm_walk *walk) +{ + struct hmm_vma_walk *hmm_vma_walk = walk->private; + struct hmm_range *range = hmm_vma_walk->range; + hmm_pfn_t *pfns = range->pfns; + unsigned long i; + + hmm_vma_walk->last = addr; + i = (addr - range->start) >> PAGE_SHIFT; + for (; addr < end; addr += PAGE_SIZE, i++) { + pfns[i] = HMM_PFN_EMPTY; + if (hmm_vma_walk->fault) { + int ret; + + ret = hmm_vma_do_fault(walk, addr, &pfns[i]); + if (ret != -EAGAIN) + return ret; + } + } + + return hmm_vma_walk->fault ? -EAGAIN : 0; +} + +static int hmm_vma_walk_clear(unsigned long addr, + unsigned long end, + struct mm_walk *walk) +{ + struct hmm_vma_walk *hmm_vma_walk = walk->private; + struct hmm_range *range = hmm_vma_walk->range; + hmm_pfn_t *pfns = range->pfns; + unsigned long i; + + hmm_vma_walk->last = addr; + i = (addr - range->start) >> PAGE_SHIFT; + for (; addr < end; addr += PAGE_SIZE, i++) { + pfns[i] = 0; + if (hmm_vma_walk->fault) { + int ret; + + ret = hmm_vma_do_fault(walk, addr, &pfns[i]); + if (ret != -EAGAIN) + return ret; + } + } + + return hmm_vma_walk->fault ? -EAGAIN : 0; +} + +static int hmm_vma_walk_pmd(pmd_t *pmdp, + unsigned long start, + unsigned long end, + struct mm_walk *walk) +{ + struct hmm_vma_walk *hmm_vma_walk = walk->private; + struct hmm_range *range = hmm_vma_walk->range; + struct vm_area_struct *vma = walk->vma; + hmm_pfn_t *pfns = range->pfns; + unsigned long addr = start, i; + bool write_fault; + hmm_pfn_t flag; + pte_t *ptep; + + i = (addr - range->start) >> PAGE_SHIFT; + flag = vma->vm_flags & VM_READ ? HMM_PFN_READ : 0; + write_fault = hmm_vma_walk->fault & hmm_vma_walk->write; + +again: + if (pmd_none(*pmdp)) + return hmm_vma_walk_hole(start, end, walk); + + if (pmd_huge(*pmdp) && vma->vm_flags & VM_HUGETLB) + return hmm_pfns_bad(start, end, walk); + + if (pmd_devmap(*pmdp) || pmd_trans_huge(*pmdp)) { + unsigned long pfn; + pmd_t pmd; + + /* + * No need to take pmd_lock here, even if some other threads + * is splitting the huge pmd we will get that event through + * mmu_notifier callback. + * + * So just read pmd value and check again its a transparent + * huge or device mapping one and compute corresponding pfn + * values. + */ + pmd = pmd_read_atomic(pmdp); + barrier(); + if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd)) + goto again; + if (pmd_protnone(pmd)) + return hmm_vma_walk_clear(start, end, walk); + + if (write_fault && !pmd_write(pmd)) + return hmm_vma_walk_clear(start, end, walk); + + pfn = pmd_pfn(pmd) + pte_index(addr); + flag |= pmd_write(pmd) ? HMM_PFN_WRITE : 0; + for (; addr < end; addr += PAGE_SIZE, i++, pfn++) + pfns[i] = hmm_pfn_t_from_pfn(pfn) | flag; + return 0; + } + + if (pmd_bad(*pmdp)) + return hmm_pfns_bad(start, end, walk); + + ptep = pte_offset_map(pmdp, addr); + for (; addr < end; addr += PAGE_SIZE, ptep++, i++) { + pte_t pte = *ptep; + + pfns[i] = 0; + + if (pte_none(pte)) { + pfns[i] = HMM_PFN_EMPTY; + if (hmm_vma_walk->fault) + goto fault; + continue; + } + + if (!pte_present(pte)) { + swp_entry_t entry; + + if (!non_swap_entry(entry)) { + if (hmm_vma_walk->fault) + goto fault; + continue; + } + + entry = pte_to_swp_entry(pte); + + /* + * This is a special swap entry, ignore migration, use + * device and report anything else as error. + */ + if (is_device_private_entry(entry)) { + pfns[i] = hmm_pfn_t_from_pfn(swp_offset(entry)); + if (is_write_device_private_entry(entry)) { + pfns[i] |= HMM_PFN_WRITE; + } else if (write_fault) + goto fault; + pfns[i] |= HMM_PFN_DEVICE_UNADDRESSABLE; + pfns[i] |= flag; + } else if (is_migration_entry(entry)) { + if (hmm_vma_walk->fault) { + pte_unmap(ptep); + hmm_vma_walk->last = addr; + migration_entry_wait(vma->vm_mm, + pmdp, addr); + return -EAGAIN; + } + continue; + } else { + /* Report error for everything else */ + pfns[i] = HMM_PFN_ERROR; + } + continue; + } + + if (write_fault && !pte_write(pte)) + goto fault; + + pfns[i] = hmm_pfn_t_from_pfn(pte_pfn(pte)) | flag; + pfns[i] |= pte_write(pte) ? HMM_PFN_WRITE : 0; + continue; + +fault: + pte_unmap(ptep); + /* Fault all pages in range */ + return hmm_vma_walk_clear(start, end, walk); + } + pte_unmap(ptep - 1); + + return 0; +} + +/* + * hmm_vma_get_pfns() - snapshot CPU page table for a range of virtual addresses + * @vma: virtual memory area containing the virtual address range + * @range: used to track snapshot validity + * @start: range virtual start address (inclusive) + * @end: range virtual end address (exclusive) + * @entries: array of hmm_pfn_t: provided by the caller, filled in by function + * Returns: -EINVAL if invalid argument, -ENOMEM out of memory, 0 success + * + * This snapshots the CPU page table for a range of virtual addresses. Snapshot + * validity is tracked by range struct. See hmm_vma_range_done() for further + * information. + * + * The range struct is initialized here. It tracks the CPU page table, but only + * if the function returns success (0), in which case the caller must then call + * hmm_vma_range_done() to stop CPU page table update tracking on this range. + * + * NOT CALLING hmm_vma_range_done() IF FUNCTION RETURNS 0 WILL LEAD TO SERIOUS + * MEMORY CORRUPTION ! YOU HAVE BEEN WARNED ! + */ +int hmm_vma_get_pfns(struct vm_area_struct *vma, + struct hmm_range *range, + unsigned long start, + unsigned long end, + hmm_pfn_t *pfns) +{ + struct hmm_vma_walk hmm_vma_walk; + struct mm_walk mm_walk; + struct hmm *hmm; + + /* FIXME support hugetlb fs */ + if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) { + hmm_pfns_special(pfns, start, end); + return -EINVAL; + } + + /* Sanity check, this really should not happen ! */ + if (start < vma->vm_start || start >= vma->vm_end) + return -EINVAL; + if (end < vma->vm_start || end > vma->vm_end) + return -EINVAL; + + hmm = hmm_register(vma->vm_mm); + if (!hmm) + return -ENOMEM; + /* Caller must have registered a mirror, via hmm_mirror_register() ! */ + if (!hmm->mmu_notifier.ops) + return -EINVAL; + + /* Initialize range to track CPU page table update */ + range->start = start; + range->pfns = pfns; + range->end = end; + spin_lock(&hmm->lock); + range->valid = true; + list_add_rcu(&range->list, &hmm->ranges); + spin_unlock(&hmm->lock); + + hmm_vma_walk.fault = false; + hmm_vma_walk.range = range; + mm_walk.private = &hmm_vma_walk; + + mm_walk.vma = vma; + mm_walk.mm = vma->vm_mm; + mm_walk.pte_entry = NULL; + mm_walk.test_walk = NULL; + mm_walk.hugetlb_entry = NULL; + mm_walk.pmd_entry = hmm_vma_walk_pmd; + mm_walk.pte_hole = hmm_vma_walk_hole; + + walk_page_range(start, end, &mm_walk); + return 0; +} +EXPORT_SYMBOL(hmm_vma_get_pfns); + +/* + * hmm_vma_range_done() - stop tracking change to CPU page table over a range + * @vma: virtual memory area containing the virtual address range + * @range: range being tracked + * Returns: false if range data has been invalidated, true otherwise + * + * Range struct is used to track updates to the CPU page table after a call to + * either hmm_vma_get_pfns() or hmm_vma_fault(). Once the device driver is done + * using the data, or wants to lock updates to the data it got from those + * functions, it must call the hmm_vma_range_done() function, which will then + * stop tracking CPU page table updates. + * + * Note that device driver must still implement general CPU page table update + * tracking either by using hmm_mirror (see hmm_mirror_register()) or by using + * the mmu_notifier API directly. + * + * CPU page table update tracking done through hmm_range is only temporary and + * to be used while trying to duplicate CPU page table contents for a range of + * virtual addresses. + * + * There are two ways to use this : + * again: + * hmm_vma_get_pfns(vma, range, start, end, pfns); or hmm_vma_fault(...); + * trans = device_build_page_table_update_transaction(pfns); + * device_page_table_lock(); + * if (!hmm_vma_range_done(vma, range)) { + * device_page_table_unlock(); + * goto again; + * } + * device_commit_transaction(trans); + * device_page_table_unlock(); + * + * Or: + * hmm_vma_get_pfns(vma, range, start, end, pfns); or hmm_vma_fault(...); + * device_page_table_lock(); + * hmm_vma_range_done(vma, range); + * device_update_page_table(pfns); + * device_page_table_unlock(); + */ +bool hmm_vma_range_done(struct vm_area_struct *vma, struct hmm_range *range) +{ + unsigned long npages = (range->end - range->start) >> PAGE_SHIFT; + struct hmm *hmm; + + if (range->end <= range->start) { + BUG(); + return false; + } + + hmm = hmm_register(vma->vm_mm); + if (!hmm) { + memset(range->pfns, 0, sizeof(*range->pfns) * npages); + return false; + } + + spin_lock(&hmm->lock); + list_del_rcu(&range->list); + spin_unlock(&hmm->lock); + + return range->valid; +} +EXPORT_SYMBOL(hmm_vma_range_done); + +/* + * hmm_vma_fault() - try to fault some address in a virtual address range + * @vma: virtual memory area containing the virtual address range + * @range: use to track pfns array content validity + * @start: fault range virtual start address (inclusive) + * @end: fault range virtual end address (exclusive) + * @pfns: array of hmm_pfn_t, only entry with fault flag set will be faulted + * @write: is it a write fault + * @block: allow blocking on fault (if true it sleeps and do not drop mmap_sem) + * Returns: 0 success, error otherwise (-EAGAIN means mmap_sem have been drop) + * + * This is similar to a regular CPU page fault except that it will not trigger + * any memory migration if the memory being faulted is not accessible by CPUs. + * + * On error, for one virtual address in the range, the function will set the + * hmm_pfn_t error flag for the corresponding pfn entry. + * + * Expected use pattern: + * retry: + * down_read(&mm->mmap_sem); + * // Find vma and address device wants to fault, initialize hmm_pfn_t + * // array accordingly + * ret = hmm_vma_fault(vma, start, end, pfns, allow_retry); + * switch (ret) { + * case -EAGAIN: + * hmm_vma_range_done(vma, range); + * // You might want to rate limit or yield to play nicely, you may + * // also commit any valid pfn in the array assuming that you are + * // getting true from hmm_vma_range_monitor_end() + * goto retry; + * case 0: + * break; + * default: + * // Handle error ! + * up_read(&mm->mmap_sem) + * return; + * } + * // Take device driver lock that serialize device page table update + * driver_lock_device_page_table_update(); + * hmm_vma_range_done(vma, range); + * // Commit pfns we got from hmm_vma_fault() + * driver_unlock_device_page_table_update(); + * up_read(&mm->mmap_sem) + * + * YOU MUST CALL hmm_vma_range_done() AFTER THIS FUNCTION RETURN SUCCESS (0) + * BEFORE FREEING THE range struct OR YOU WILL HAVE SERIOUS MEMORY CORRUPTION ! + * + * YOU HAVE BEEN WARNED ! + */ +int hmm_vma_fault(struct vm_area_struct *vma, + struct hmm_range *range, + unsigned long start, + unsigned long end, + hmm_pfn_t *pfns, + bool write, + bool block) +{ + struct hmm_vma_walk hmm_vma_walk; + struct mm_walk mm_walk; + struct hmm *hmm; + int ret; + + /* Sanity check, this really should not happen ! */ + if (start < vma->vm_start || start >= vma->vm_end) + return -EINVAL; + if (end < vma->vm_start || end > vma->vm_end) + return -EINVAL; + + hmm = hmm_register(vma->vm_mm); + if (!hmm) { + hmm_pfns_clear(pfns, start, end); + return -ENOMEM; + } + /* Caller must have registered a mirror using hmm_mirror_register() */ + if (!hmm->mmu_notifier.ops) + return -EINVAL; + + /* Initialize range to track CPU page table update */ + range->start = start; + range->pfns = pfns; + range->end = end; + spin_lock(&hmm->lock); + range->valid = true; + list_add_rcu(&range->list, &hmm->ranges); + spin_unlock(&hmm->lock); + + /* FIXME support hugetlb fs */ + if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) { + hmm_pfns_special(pfns, start, end); + return 0; + } + + hmm_vma_walk.fault = true; + hmm_vma_walk.write = write; + hmm_vma_walk.block = block; + hmm_vma_walk.range = range; + mm_walk.private = &hmm_vma_walk; + hmm_vma_walk.last = range->start; + + mm_walk.vma = vma; + mm_walk.mm = vma->vm_mm; + mm_walk.pte_entry = NULL; + mm_walk.test_walk = NULL; + mm_walk.hugetlb_entry = NULL; + mm_walk.pmd_entry = hmm_vma_walk_pmd; + mm_walk.pte_hole = hmm_vma_walk_hole; + + do { + ret = walk_page_range(start, end, &mm_walk); + start = hmm_vma_walk.last; + } while (ret == -EAGAIN); + + if (ret) { + unsigned long i; + + i = (hmm_vma_walk.last - range->start) >> PAGE_SHIFT; + hmm_pfns_clear(&pfns[i], hmm_vma_walk.last, end); + hmm_vma_range_done(vma, range); + } + return ret; +} +EXPORT_SYMBOL(hmm_vma_fault); +#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */ + + +#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC) +struct page *hmm_vma_alloc_locked_page(struct vm_area_struct *vma, + unsigned long addr) +{ + struct page *page; + + page = alloc_page_vma(GFP_HIGHUSER, vma, addr); + if (!page) + return NULL; + lock_page(page); + return page; +} +EXPORT_SYMBOL(hmm_vma_alloc_locked_page); + + +static void hmm_devmem_ref_release(struct percpu_ref *ref) +{ + struct hmm_devmem *devmem; + + devmem = container_of(ref, struct hmm_devmem, ref); + complete(&devmem->completion); +} + +static void hmm_devmem_ref_exit(void *data) +{ + struct percpu_ref *ref = data; + struct hmm_devmem *devmem; + + devmem = container_of(ref, struct hmm_devmem, ref); + percpu_ref_exit(ref); + devm_remove_action(devmem->device, &hmm_devmem_ref_exit, data); +} + +static void hmm_devmem_ref_kill(void *data) +{ + struct percpu_ref *ref = data; + struct hmm_devmem *devmem; + + devmem = container_of(ref, struct hmm_devmem, ref); + percpu_ref_kill(ref); + wait_for_completion(&devmem->completion); + devm_remove_action(devmem->device, &hmm_devmem_ref_kill, data); +} + +static int hmm_devmem_fault(struct vm_area_struct *vma, + unsigned long addr, + const struct page *page, + unsigned int flags, + pmd_t *pmdp) +{ + struct hmm_devmem *devmem = page->pgmap->data; + + return devmem->ops->fault(devmem, vma, addr, page, flags, pmdp); +} + +static void hmm_devmem_free(struct page *page, void *data) +{ + struct hmm_devmem *devmem = data; + + devmem->ops->free(devmem, page); +} + +static DEFINE_MUTEX(hmm_devmem_lock); +static RADIX_TREE(hmm_devmem_radix, GFP_KERNEL); + +static void hmm_devmem_radix_release(struct resource *resource) +{ + resource_size_t key, align_start, align_size, align_end; + + align_start = resource->start & ~(PA_SECTION_SIZE - 1); + align_size = ALIGN(resource_size(resource), PA_SECTION_SIZE); + align_end = align_start + align_size - 1; + + mutex_lock(&hmm_devmem_lock); + for (key = resource->start; + key <= resource->end; + key += PA_SECTION_SIZE) + radix_tree_delete(&hmm_devmem_radix, key >> PA_SECTION_SHIFT); + mutex_unlock(&hmm_devmem_lock); +} + +static void hmm_devmem_release(struct device *dev, void *data) +{ + struct hmm_devmem *devmem = data; + struct resource *resource = devmem->resource; + unsigned long start_pfn, npages; + struct zone *zone; + struct page *page; + + if (percpu_ref_tryget_live(&devmem->ref)) { + dev_WARN(dev, "%s: page mapping is still live!\n", __func__); + percpu_ref_put(&devmem->ref); + } + + /* pages are dead and unused, undo the arch mapping */ + start_pfn = (resource->start & ~(PA_SECTION_SIZE - 1)) >> PAGE_SHIFT; + npages = ALIGN(resource_size(resource), PA_SECTION_SIZE) >> PAGE_SHIFT; + + page = pfn_to_page(start_pfn); + zone = page_zone(page); + + mem_hotplug_begin(); + if (resource->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) + __remove_pages(zone, start_pfn, npages); + else + arch_remove_memory(start_pfn << PAGE_SHIFT, + npages << PAGE_SHIFT); + mem_hotplug_done(); + + hmm_devmem_radix_release(resource); +} + +static struct hmm_devmem *hmm_devmem_find(resource_size_t phys) +{ + WARN_ON_ONCE(!rcu_read_lock_held()); + + return radix_tree_lookup(&hmm_devmem_radix, phys >> PA_SECTION_SHIFT); +} + +static int hmm_devmem_pages_create(struct hmm_devmem *devmem) +{ + resource_size_t key, align_start, align_size, align_end; + struct device *device = devmem->device; + int ret, nid, is_ram; + unsigned long pfn; + + align_start = devmem->resource->start & ~(PA_SECTION_SIZE - 1); + align_size = ALIGN(devmem->resource->start + + resource_size(devmem->resource), + PA_SECTION_SIZE) - align_start; + + is_ram = region_intersects(align_start, align_size, + IORESOURCE_SYSTEM_RAM, + IORES_DESC_NONE); + if (is_ram == REGION_MIXED) { + WARN_ONCE(1, "%s attempted on mixed region %pr\n", + __func__, devmem->resource); + return -ENXIO; + } + if (is_ram == REGION_INTERSECTS) + return -ENXIO; + + if (devmem->resource->desc == IORES_DESC_DEVICE_PUBLIC_MEMORY) + devmem->pagemap.type = MEMORY_DEVICE_PUBLIC; + else + devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; + + devmem->pagemap.res = devmem->resource; + devmem->pagemap.page_fault = hmm_devmem_fault; + devmem->pagemap.page_free = hmm_devmem_free; + devmem->pagemap.dev = devmem->device; + devmem->pagemap.ref = &devmem->ref; + devmem->pagemap.data = devmem; + + mutex_lock(&hmm_devmem_lock); + align_end = align_start + align_size - 1; + for (key = align_start; key <= align_end; key += PA_SECTION_SIZE) { + struct hmm_devmem *dup; + + rcu_read_lock(); + dup = hmm_devmem_find(key); + rcu_read_unlock(); + if (dup) { + dev_err(device, "%s: collides with mapping for %s\n", + __func__, dev_name(dup->device)); + mutex_unlock(&hmm_devmem_lock); + ret = -EBUSY; + goto error; + } + ret = radix_tree_insert(&hmm_devmem_radix, + key >> PA_SECTION_SHIFT, + devmem); + if (ret) { + dev_err(device, "%s: failed: %d\n", __func__, ret); + mutex_unlock(&hmm_devmem_lock); + goto error_radix; + } + } + mutex_unlock(&hmm_devmem_lock); + + nid = dev_to_node(device); + if (nid < 0) + nid = numa_mem_id(); + + mem_hotplug_begin(); + /* + * For device private memory we call add_pages() as we only need to + * allocate and initialize struct page for the device memory. More- + * over the device memory is un-accessible thus we do not want to + * create a linear mapping for the memory like arch_add_memory() + * would do. + * + * For device public memory, which is accesible by the CPU, we do + * want the linear mapping and thus use arch_add_memory(). + */ + if (devmem->pagemap.type == MEMORY_DEVICE_PUBLIC) + ret = arch_add_memory(nid, align_start, align_size, false); + else + ret = add_pages(nid, align_start >> PAGE_SHIFT, + align_size >> PAGE_SHIFT, false); + if (ret) { + mem_hotplug_done(); + goto error_add_memory; + } + move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], + align_start >> PAGE_SHIFT, + align_size >> PAGE_SHIFT); + mem_hotplug_done(); + + for (pfn = devmem->pfn_first; pfn < devmem->pfn_last; pfn++) { + struct page *page = pfn_to_page(pfn); + + page->pgmap = &devmem->pagemap; + } + return 0; + +error_add_memory: + untrack_pfn(NULL, PHYS_PFN(align_start), align_size); +error_radix: + hmm_devmem_radix_release(devmem->resource); +error: + return ret; +} + +static int hmm_devmem_match(struct device *dev, void *data, void *match_data) +{ + struct hmm_devmem *devmem = data; + + return devmem->resource == match_data; +} + +static void hmm_devmem_pages_remove(struct hmm_devmem *devmem) +{ + devres_release(devmem->device, &hmm_devmem_release, + &hmm_devmem_match, devmem->resource); +} + +/* + * hmm_devmem_add() - hotplug ZONE_DEVICE memory for device memory + * + * @ops: memory event device driver callback (see struct hmm_devmem_ops) + * @device: device struct to bind the resource too + * @size: size in bytes of the device memory to add + * Returns: pointer to new hmm_devmem struct ERR_PTR otherwise + * + * This function first finds an empty range of physical address big enough to + * contain the new resource, and then hotplugs it as ZONE_DEVICE memory, which + * in turn allocates struct pages. It does not do anything beyond that; all + * events affecting the memory will go through the various callbacks provided + * by hmm_devmem_ops struct. + * + * Device driver should call this function during device initialization and + * is then responsible of memory management. HMM only provides helpers. + */ +struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops, + struct device *device, + unsigned long size) +{ + struct hmm_devmem *devmem; + resource_size_t addr; + int ret; + + static_branch_enable(&device_private_key); + + devmem = devres_alloc_node(&hmm_devmem_release, sizeof(*devmem), + GFP_KERNEL, dev_to_node(device)); + if (!devmem) + return ERR_PTR(-ENOMEM); + + init_completion(&devmem->completion); + devmem->pfn_first = -1UL; + devmem->pfn_last = -1UL; + devmem->resource = NULL; + devmem->device = device; + devmem->ops = ops; + + ret = percpu_ref_init(&devmem->ref, &hmm_devmem_ref_release, + 0, GFP_KERNEL); + if (ret) + goto error_percpu_ref; + + ret = devm_add_action(device, hmm_devmem_ref_exit, &devmem->ref); + if (ret) + goto error_devm_add_action; + + size = ALIGN(size, PA_SECTION_SIZE); + addr = min((unsigned long)iomem_resource.end, + (1UL << MAX_PHYSMEM_BITS) - 1); + addr = addr - size + 1UL; + + /* + * FIXME add a new helper to quickly walk resource tree and find free + * range + * + * FIXME what about ioport_resource resource ? + */ + for (; addr > size && addr >= iomem_resource.start; addr -= size) { + ret = region_intersects(addr, size, 0, IORES_DESC_NONE); + if (ret != REGION_DISJOINT) + continue; + + devmem->resource = devm_request_mem_region(device, addr, size, + dev_name(device)); + if (!devmem->resource) { + ret = -ENOMEM; + goto error_no_resource; + } + break; + } + if (!devmem->resource) { + ret = -ERANGE; + goto error_no_resource; + } + + devmem->resource->desc = IORES_DESC_DEVICE_PRIVATE_MEMORY; + devmem->pfn_first = devmem->resource->start >> PAGE_SHIFT; + devmem->pfn_last = devmem->pfn_first + + (resource_size(devmem->resource) >> PAGE_SHIFT); + + ret = hmm_devmem_pages_create(devmem); + if (ret) + goto error_pages; + + devres_add(device, devmem); + + ret = devm_add_action(device, hmm_devmem_ref_kill, &devmem->ref); + if (ret) { + hmm_devmem_remove(devmem); + return ERR_PTR(ret); + } + + return devmem; + +error_pages: + devm_release_mem_region(device, devmem->resource->start, + resource_size(devmem->resource)); +error_no_resource: +error_devm_add_action: + hmm_devmem_ref_kill(&devmem->ref); + hmm_devmem_ref_exit(&devmem->ref); +error_percpu_ref: + devres_free(devmem); + return ERR_PTR(ret); +} +EXPORT_SYMBOL(hmm_devmem_add); + +struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops, + struct device *device, + struct resource *res) +{ + struct hmm_devmem *devmem; + int ret; + + if (res->desc != IORES_DESC_DEVICE_PUBLIC_MEMORY) + return ERR_PTR(-EINVAL); + + static_branch_enable(&device_private_key); + + devmem = devres_alloc_node(&hmm_devmem_release, sizeof(*devmem), + GFP_KERNEL, dev_to_node(device)); + if (!devmem) + return ERR_PTR(-ENOMEM); + + init_completion(&devmem->completion); + devmem->pfn_first = -1UL; + devmem->pfn_last = -1UL; + devmem->resource = res; + devmem->device = device; + devmem->ops = ops; + + ret = percpu_ref_init(&devmem->ref, &hmm_devmem_ref_release, + 0, GFP_KERNEL); + if (ret) + goto error_percpu_ref; + + ret = devm_add_action(device, hmm_devmem_ref_exit, &devmem->ref); + if (ret) + goto error_devm_add_action; + + + devmem->pfn_first = devmem->resource->start >> PAGE_SHIFT; + devmem->pfn_last = devmem->pfn_first + + (resource_size(devmem->resource) >> PAGE_SHIFT); + + ret = hmm_devmem_pages_create(devmem); + if (ret) + goto error_devm_add_action; + + devres_add(device, devmem); + + ret = devm_add_action(device, hmm_devmem_ref_kill, &devmem->ref); + if (ret) { + hmm_devmem_remove(devmem); + return ERR_PTR(ret); + } + + return devmem; + +error_devm_add_action: + hmm_devmem_ref_kill(&devmem->ref); + hmm_devmem_ref_exit(&devmem->ref); +error_percpu_ref: + devres_free(devmem); + return ERR_PTR(ret); +} +EXPORT_SYMBOL(hmm_devmem_add_resource); + +/* + * hmm_devmem_remove() - remove device memory (kill and free ZONE_DEVICE) + * + * @devmem: hmm_devmem struct use to track and manage the ZONE_DEVICE memory + * + * This will hot-unplug memory that was hotplugged by hmm_devmem_add on behalf + * of the device driver. It will free struct page and remove the resource that + * reserved the physical address range for this device memory. + */ +void hmm_devmem_remove(struct hmm_devmem *devmem) +{ + resource_size_t start, size; + struct device *device; + bool cdm = false; + + if (!devmem) + return; + + device = devmem->device; + start = devmem->resource->start; + size = resource_size(devmem->resource); + + cdm = devmem->resource->desc == IORES_DESC_DEVICE_PUBLIC_MEMORY; + hmm_devmem_ref_kill(&devmem->ref); + hmm_devmem_ref_exit(&devmem->ref); + hmm_devmem_pages_remove(devmem); + + if (!cdm) + devm_release_mem_region(device, start, size); +} +EXPORT_SYMBOL(hmm_devmem_remove); + +/* + * A device driver that wants to handle multiple devices memory through a + * single fake device can use hmm_device to do so. This is purely a helper + * and it is not needed to make use of any HMM functionality. + */ +#define HMM_DEVICE_MAX 256 + +static DECLARE_BITMAP(hmm_device_mask, HMM_DEVICE_MAX); +static DEFINE_SPINLOCK(hmm_device_lock); +static struct class *hmm_device_class; +static dev_t hmm_device_devt; + +static void hmm_device_release(struct device *device) +{ + struct hmm_device *hmm_device; + + hmm_device = container_of(device, struct hmm_device, device); + spin_lock(&hmm_device_lock); + clear_bit(hmm_device->minor, hmm_device_mask); + spin_unlock(&hmm_device_lock); + + kfree(hmm_device); +} + +struct hmm_device *hmm_device_new(void *drvdata) +{ + struct hmm_device *hmm_device; + + hmm_device = kzalloc(sizeof(*hmm_device), GFP_KERNEL); + if (!hmm_device) + return ERR_PTR(-ENOMEM); + + spin_lock(&hmm_device_lock); + hmm_device->minor = find_first_zero_bit(hmm_device_mask, HMM_DEVICE_MAX); + if (hmm_device->minor >= HMM_DEVICE_MAX) { + spin_unlock(&hmm_device_lock); + kfree(hmm_device); + return ERR_PTR(-EBUSY); + } + set_bit(hmm_device->minor, hmm_device_mask); + spin_unlock(&hmm_device_lock); + + dev_set_name(&hmm_device->device, "hmm_device%d", hmm_device->minor); + hmm_device->device.devt = MKDEV(MAJOR(hmm_device_devt), + hmm_device->minor); + hmm_device->device.release = hmm_device_release; + dev_set_drvdata(&hmm_device->device, drvdata); + hmm_device->device.class = hmm_device_class; + device_initialize(&hmm_device->device); + + return hmm_device; +} +EXPORT_SYMBOL(hmm_device_new); + +void hmm_device_put(struct hmm_device *hmm_device) +{ + put_device(&hmm_device->device); +} +EXPORT_SYMBOL(hmm_device_put); + +static int __init hmm_init(void) +{ + int ret; + + ret = alloc_chrdev_region(&hmm_device_devt, 0, + HMM_DEVICE_MAX, + "hmm_device"); + if (ret) + return ret; + + hmm_device_class = class_create(THIS_MODULE, "hmm_device"); + if (IS_ERR(hmm_device_class)) { + unregister_chrdev_region(hmm_device_devt, HMM_DEVICE_MAX); + return PTR_ERR(hmm_device_class); + } + return 0; +} + +device_initcall(hmm_init); +#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 0b51e70e0a8b5918e4de281a7d379eb4ce8c4323..269b5df58543e44d6a283c6268036f010aad37c2 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -928,6 +928,25 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, ret = -EAGAIN; pmd = *src_pmd; + +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION + if (unlikely(is_swap_pmd(pmd))) { + swp_entry_t entry = pmd_to_swp_entry(pmd); + + VM_BUG_ON(!is_pmd_migration_entry(pmd)); + if (is_write_migration_entry(entry)) { + make_migration_entry_read(&entry); + pmd = swp_entry_to_pmd(entry); + if (pmd_swp_soft_dirty(*src_pmd)) + pmd = pmd_swp_mksoft_dirty(pmd); + set_pmd_at(src_mm, addr, src_pmd, pmd); + } + set_pmd_at(dst_mm, addr, dst_pmd, pmd); + ret = 0; + goto out_unlock; + } +#endif + if (unlikely(!pmd_trans_huge(pmd))) { pte_free(dst_mm, pgtable); goto out_unlock; @@ -1599,6 +1618,12 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, if (is_huge_zero_pmd(orig_pmd)) goto out; + if (unlikely(!pmd_present(orig_pmd))) { + VM_BUG_ON(thp_migration_supported() && + !is_pmd_migration_entry(orig_pmd)); + goto out; + } + page = pmd_page(orig_pmd); /* * If other processes are mapping this page, we couldn't discard @@ -1684,10 +1709,24 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, spin_unlock(ptl); tlb_remove_page_size(tlb, pmd_page(orig_pmd), HPAGE_PMD_SIZE); } else { - struct page *page = pmd_page(orig_pmd); - page_remove_rmap(page, true); - VM_BUG_ON_PAGE(page_mapcount(page) < 0, page); - VM_BUG_ON_PAGE(!PageHead(page), page); + struct page *page = NULL; + int flush_needed = 1; + + if (pmd_present(orig_pmd)) { + page = pmd_page(orig_pmd); + page_remove_rmap(page, true); + VM_BUG_ON_PAGE(page_mapcount(page) < 0, page); + VM_BUG_ON_PAGE(!PageHead(page), page); + } else if (thp_migration_supported()) { + swp_entry_t entry; + + VM_BUG_ON(!is_pmd_migration_entry(orig_pmd)); + entry = pmd_to_swp_entry(orig_pmd); + page = pfn_to_page(swp_offset(entry)); + flush_needed = 0; + } else + WARN_ONCE(1, "Non present huge pmd without pmd migration enabled!"); + if (PageAnon(page)) { zap_deposited_table(tlb->mm, pmd); add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR); @@ -1696,8 +1735,10 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, zap_deposited_table(tlb->mm, pmd); add_mm_counter(tlb->mm, MM_FILEPAGES, -HPAGE_PMD_NR); } + spin_unlock(ptl); - tlb_remove_page_size(tlb, page, HPAGE_PMD_SIZE); + if (flush_needed) + tlb_remove_page_size(tlb, page, HPAGE_PMD_SIZE); } return 1; } @@ -1717,6 +1758,17 @@ static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl, } #endif +static pmd_t move_soft_dirty_pmd(pmd_t pmd) +{ +#ifdef CONFIG_MEM_SOFT_DIRTY + if (unlikely(is_pmd_migration_entry(pmd))) + pmd = pmd_swp_mksoft_dirty(pmd); + else if (pmd_present(pmd)) + pmd = pmd_mksoft_dirty(pmd); +#endif + return pmd; +} + bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, unsigned long old_end, pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush) @@ -1759,7 +1811,8 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, pgtable = pgtable_trans_huge_withdraw(mm, old_pmd); pgtable_trans_huge_deposit(mm, new_pmd, pgtable); } - set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); + pmd = move_soft_dirty_pmd(pmd); + set_pmd_at(mm, new_addr, new_pmd, pmd); if (new_ptl != old_ptl) spin_unlock(new_ptl); if (force_flush) @@ -1794,6 +1847,27 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, preserve_write = prot_numa && pmd_write(*pmd); ret = 1; +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION + if (is_swap_pmd(*pmd)) { + swp_entry_t entry = pmd_to_swp_entry(*pmd); + + VM_BUG_ON(!is_pmd_migration_entry(*pmd)); + if (is_write_migration_entry(entry)) { + pmd_t newpmd; + /* + * A protection check is difficult so + * just be safe and disable write + */ + make_migration_entry_read(&entry); + newpmd = swp_entry_to_pmd(entry); + if (pmd_swp_soft_dirty(*pmd)) + newpmd = pmd_swp_mksoft_dirty(newpmd); + set_pmd_at(mm, addr, pmd, newpmd); + } + goto unlock; + } +#endif + /* * Avoid trapping faults against the zero page. The read-only * data is likely to be read-cached on the local CPU and @@ -1859,7 +1933,8 @@ spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) { spinlock_t *ptl; ptl = pmd_lock(vma->vm_mm, pmd); - if (likely(pmd_trans_huge(*pmd) || pmd_devmap(*pmd))) + if (likely(is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || + pmd_devmap(*pmd))) return ptl; spin_unlock(ptl); return NULL; @@ -1977,14 +2052,15 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, struct page *page; pgtable_t pgtable; pmd_t _pmd; - bool young, write, dirty, soft_dirty; + bool young, write, dirty, soft_dirty, pmd_migration = false; unsigned long addr; int i; VM_BUG_ON(haddr & ~HPAGE_PMD_MASK); VM_BUG_ON_VMA(vma->vm_start > haddr, vma); VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma); - VM_BUG_ON(!pmd_trans_huge(*pmd) && !pmd_devmap(*pmd)); + VM_BUG_ON(!is_pmd_migration_entry(*pmd) && !pmd_trans_huge(*pmd) + && !pmd_devmap(*pmd)); count_vm_event(THP_SPLIT_PMD); @@ -2009,7 +2085,16 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, return __split_huge_zero_page_pmd(vma, haddr, pmd); } - page = pmd_page(*pmd); +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION + pmd_migration = is_pmd_migration_entry(*pmd); + if (pmd_migration) { + swp_entry_t entry; + + entry = pmd_to_swp_entry(*pmd); + page = pfn_to_page(swp_offset(entry)); + } else +#endif + page = pmd_page(*pmd); VM_BUG_ON_PAGE(!page_count(page), page); page_ref_add(page, HPAGE_PMD_NR - 1); write = pmd_write(*pmd); @@ -2028,7 +2113,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, * transferred to avoid any possibility of altering * permissions across VMAs. */ - if (freeze) { + if (freeze || pmd_migration) { swp_entry_t swp_entry; swp_entry = make_migration_entry(page + i, write); entry = swp_entry_to_pte(swp_entry); @@ -2127,7 +2212,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, page = pmd_page(*pmd); if (PageMlocked(page)) clear_page_mlock(page); - } else if (!pmd_devmap(*pmd)) + } else if (!(pmd_devmap(*pmd) || is_pmd_migration_entry(*pmd))) goto out; __split_huge_pmd_locked(vma, pmd, haddr, freeze); out: @@ -2210,7 +2295,7 @@ static void freeze_page(struct page *page) VM_BUG_ON_PAGE(!PageHead(page), page); if (PageAnon(page)) - ttu_flags |= TTU_MIGRATION; + ttu_flags |= TTU_SPLIT_FREEZE; unmap_success = try_to_unmap(page, ttu_flags); VM_BUG_ON_PAGE(!unmap_success, page); @@ -2745,3 +2830,66 @@ static int __init split_huge_pages_debugfs(void) } late_initcall(split_huge_pages_debugfs); #endif + +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION +void set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, + struct page *page) +{ + struct vm_area_struct *vma = pvmw->vma; + struct mm_struct *mm = vma->vm_mm; + unsigned long address = pvmw->address; + pmd_t pmdval; + swp_entry_t entry; + pmd_t pmdswp; + + if (!(pvmw->pmd && !pvmw->pte)) + return; + + mmu_notifier_invalidate_range_start(mm, address, + address + HPAGE_PMD_SIZE); + + flush_cache_range(vma, address, address + HPAGE_PMD_SIZE); + pmdval = *pvmw->pmd; + pmdp_invalidate(vma, address, pvmw->pmd); + if (pmd_dirty(pmdval)) + set_page_dirty(page); + entry = make_migration_entry(page, pmd_write(pmdval)); + pmdswp = swp_entry_to_pmd(entry); + if (pmd_soft_dirty(pmdval)) + pmdswp = pmd_swp_mksoft_dirty(pmdswp); + set_pmd_at(mm, address, pvmw->pmd, pmdswp); + page_remove_rmap(page, true); + put_page(page); + + mmu_notifier_invalidate_range_end(mm, address, + address + HPAGE_PMD_SIZE); +} + +void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) +{ + struct vm_area_struct *vma = pvmw->vma; + struct mm_struct *mm = vma->vm_mm; + unsigned long address = pvmw->address; + unsigned long mmun_start = address & HPAGE_PMD_MASK; + pmd_t pmde; + swp_entry_t entry; + + if (!(pvmw->pmd && !pvmw->pte)) + return; + + entry = pmd_to_swp_entry(*pvmw->pmd); + get_page(new); + pmde = pmd_mkold(mk_huge_pmd(new, vma->vm_page_prot)); + if (pmd_swp_soft_dirty(*pvmw->pmd)) + pmde = pmd_mksoft_dirty(pmde); + if (is_write_migration_entry(entry)) + pmde = maybe_pmd_mkwrite(pmde, vma); + + flush_cache_range(vma, mmun_start, mmun_start + HPAGE_PMD_SIZE); + page_add_anon_rmap(new, vma, mmun_start, true); + set_pmd_at(mm, mmun_start, pvmw->pmd, pmde); + if (vma->vm_flags & VM_LOCKED) + mlock_vma_page(new); + update_mmu_cache_pmd(vma, address, pvmw->pmd); +} +#endif diff --git a/mm/interval_tree.c b/mm/interval_tree.c index f2c2492681bf14ae49456b8353965ca3c73002d1..b476643587966102e90813663b2039461d1dc679 100644 --- a/mm/interval_tree.c +++ b/mm/interval_tree.c @@ -28,7 +28,7 @@ INTERVAL_TREE_DEFINE(struct vm_area_struct, shared.rb, /* Insert node immediately after prev in the interval tree */ void vma_interval_tree_insert_after(struct vm_area_struct *node, struct vm_area_struct *prev, - struct rb_root *root) + struct rb_root_cached *root) { struct rb_node **link; struct vm_area_struct *parent; @@ -55,7 +55,7 @@ void vma_interval_tree_insert_after(struct vm_area_struct *node, node->shared.rb_subtree_last = last; rb_link_node(&node->shared.rb, &parent->shared.rb, link); - rb_insert_augmented(&node->shared.rb, root, + rb_insert_augmented(&node->shared.rb, &root->rb_root, &vma_interval_tree_augment); } @@ -74,7 +74,7 @@ INTERVAL_TREE_DEFINE(struct anon_vma_chain, rb, unsigned long, rb_subtree_last, static inline, __anon_vma_interval_tree) void anon_vma_interval_tree_insert(struct anon_vma_chain *node, - struct rb_root *root) + struct rb_root_cached *root) { #ifdef CONFIG_DEBUG_VM_RB node->cached_vma_start = avc_start_pgoff(node); @@ -84,13 +84,13 @@ void anon_vma_interval_tree_insert(struct anon_vma_chain *node, } void anon_vma_interval_tree_remove(struct anon_vma_chain *node, - struct rb_root *root) + struct rb_root_cached *root) { __anon_vma_interval_tree_remove(node, root); } struct anon_vma_chain * -anon_vma_interval_tree_iter_first(struct rb_root *root, +anon_vma_interval_tree_iter_first(struct rb_root_cached *root, unsigned long first, unsigned long last) { return __anon_vma_interval_tree_iter_first(root, first, last); diff --git a/mm/madvise.c b/mm/madvise.c index eea1c733286fba68fa7ed540aa77a1ea7f0c8f59..21261ff0466fb99d1254ee3927dd1730642d5356 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -355,7 +355,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, continue; } - page = vm_normal_page(vma, addr, ptent); + page = _vm_normal_page(vma, addr, ptent, true); if (!page) continue; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6532b219b22239a268783d399a7ffe0385ee4ccf..15af3da5af02f6acbccff3551a71461a4f4396d5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -119,6 +119,7 @@ static const char *const mem_cgroup_lru_names[] = { struct mem_cgroup_tree_per_node { struct rb_root rb_root; + struct rb_node *rb_rightmost; spinlock_t lock; }; @@ -386,6 +387,7 @@ static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_node *mz, struct rb_node **p = &mctz->rb_root.rb_node; struct rb_node *parent = NULL; struct mem_cgroup_per_node *mz_node; + bool rightmost = true; if (mz->on_tree) return; @@ -397,8 +399,11 @@ static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_node *mz, parent = *p; mz_node = rb_entry(parent, struct mem_cgroup_per_node, tree_node); - if (mz->usage_in_excess < mz_node->usage_in_excess) + if (mz->usage_in_excess < mz_node->usage_in_excess) { p = &(*p)->rb_left; + rightmost = false; + } + /* * We can't avoid mem cgroups that are over their soft * limit by the same amount @@ -406,6 +411,10 @@ static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_node *mz, else if (mz->usage_in_excess >= mz_node->usage_in_excess) p = &(*p)->rb_right; } + + if (rightmost) + mctz->rb_rightmost = &mz->tree_node; + rb_link_node(&mz->tree_node, parent, p); rb_insert_color(&mz->tree_node, &mctz->rb_root); mz->on_tree = true; @@ -416,6 +425,10 @@ static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_node *mz, { if (!mz->on_tree) return; + + if (&mz->tree_node == mctz->rb_rightmost) + mctz->rb_rightmost = rb_prev(&mz->tree_node); + rb_erase(&mz->tree_node, &mctz->rb_root); mz->on_tree = false; } @@ -496,16 +509,15 @@ static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg) static struct mem_cgroup_per_node * __mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz) { - struct rb_node *rightmost = NULL; struct mem_cgroup_per_node *mz; retry: mz = NULL; - rightmost = rb_last(&mctz->rb_root); - if (!rightmost) + if (!mctz->rb_rightmost) goto done; /* Nothing to reclaim from */ - mz = rb_entry(rightmost, struct mem_cgroup_per_node, tree_node); + mz = rb_entry(mctz->rb_rightmost, + struct mem_cgroup_per_node, tree_node); /* * Remove the node now but someone else can add it back, * we will to add it back at the end of reclaim to its correct @@ -1792,6 +1804,9 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) } stock->nr_pages += nr_pages; + if (stock->nr_pages > CHARGE_BATCH) + drain_stock(stock); + local_irq_restore(flags); } @@ -4414,12 +4429,13 @@ enum mc_target_type { MC_TARGET_NONE = 0, MC_TARGET_PAGE, MC_TARGET_SWAP, + MC_TARGET_DEVICE, }; static struct page *mc_handle_present_pte(struct vm_area_struct *vma, unsigned long addr, pte_t ptent) { - struct page *page = vm_normal_page(vma, addr, ptent); + struct page *page = _vm_normal_page(vma, addr, ptent, true); if (!page || !page_mapped(page)) return NULL; @@ -4436,7 +4452,7 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma, return page; } -#ifdef CONFIG_SWAP +#if defined(CONFIG_SWAP) || defined(CONFIG_DEVICE_PRIVATE) static struct page *mc_handle_swap_pte(struct vm_area_struct *vma, pte_t ptent, swp_entry_t *entry) { @@ -4445,6 +4461,23 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma, if (!(mc.flags & MOVE_ANON) || non_swap_entry(ent)) return NULL; + + /* + * Handle MEMORY_DEVICE_PRIVATE which are ZONE_DEVICE page belonging to + * a device and because they are not accessible by CPU they are store + * as special swap entry in the CPU page table. + */ + if (is_device_private_entry(ent)) { + page = device_private_entry_to_page(ent); + /* + * MEMORY_DEVICE_PRIVATE means ZONE_DEVICE page and which have + * a refcount of 1 when free (unlike normal page) + */ + if (!page_ref_add_unless(page, 1, 1)) + return NULL; + return page; + } + /* * Because lookup_swap_cache() updates some statistics counter, * we call find_get_page() with swapper_space directly. @@ -4605,6 +4638,13 @@ static int mem_cgroup_move_account(struct page *page, * 2(MC_TARGET_SWAP): if the swap entry corresponding to this pte is a * target for charge migration. if @target is not NULL, the entry is stored * in target->ent. + * 3(MC_TARGET_DEVICE): like MC_TARGET_PAGE but page is MEMORY_DEVICE_PUBLIC + * or MEMORY_DEVICE_PRIVATE (so ZONE_DEVICE page and thus not on the lru). + * For now we such page is charge like a regular page would be as for all + * intent and purposes it is just special memory taking the place of a + * regular page. + * + * See Documentations/vm/hmm.txt and include/linux/hmm.h * * Called with pte lock held. */ @@ -4633,6 +4673,9 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma, */ if (page->mem_cgroup == mc.from) { ret = MC_TARGET_PAGE; + if (is_device_private_page(page) || + is_device_public_page(page)) + ret = MC_TARGET_DEVICE; if (target) target->page = page; } @@ -4664,6 +4707,11 @@ static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma, struct page *page = NULL; enum mc_target_type ret = MC_TARGET_NONE; + if (unlikely(is_swap_pmd(pmd))) { + VM_BUG_ON(thp_migration_supported() && + !is_pmd_migration_entry(pmd)); + return ret; + } page = pmd_page(pmd); VM_BUG_ON_PAGE(!page || !PageHead(page), page); if (!(mc.flags & MOVE_ANON)) @@ -4695,6 +4743,11 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd, ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { + /* + * Note their can not be MC_TARGET_DEVICE for now as we do not + * support transparent huge page with MEMORY_DEVICE_PUBLIC or + * MEMORY_DEVICE_PRIVATE but this might change. + */ if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE) mc.precharge += HPAGE_PMD_NR; spin_unlock(ptl); @@ -4910,6 +4963,14 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, putback_lru_page(page); } put_page(page); + } else if (target_type == MC_TARGET_DEVICE) { + page = target.page; + if (!mem_cgroup_move_account(page, true, + mc.from, mc.to)) { + mc.precharge -= HPAGE_PMD_NR; + mc.moved_charge += HPAGE_PMD_NR; + } + put_page(page); } spin_unlock(ptl); return 0; @@ -4921,12 +4982,16 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; addr += PAGE_SIZE) { pte_t ptent = *(pte++); + bool device = false; swp_entry_t ent; if (!mc.precharge) break; switch (get_mctgt_type(vma, addr, ptent, &target)) { + case MC_TARGET_DEVICE: + device = true; + /* fall through */ case MC_TARGET_PAGE: page = target.page; /* @@ -4937,7 +5002,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, */ if (PageTransCompound(page)) goto put; - if (isolate_lru_page(page)) + if (!device && isolate_lru_page(page)) goto put; if (!mem_cgroup_move_account(page, false, mc.from, mc.to)) { @@ -4945,7 +5010,8 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, /* we uncharge from mc.from later. */ mc.moved_charge++; } - putback_lru_page(page); + if (!device) + putback_lru_page(page); put: /* get_mctgt_type() gets the page */ put_page(page); break; @@ -5535,48 +5601,102 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg, cancel_charge(memcg, nr_pages); } -static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout, - unsigned long nr_anon, unsigned long nr_file, - unsigned long nr_kmem, unsigned long nr_huge, - unsigned long nr_shmem, struct page *dummy_page) +struct uncharge_gather { + struct mem_cgroup *memcg; + unsigned long pgpgout; + unsigned long nr_anon; + unsigned long nr_file; + unsigned long nr_kmem; + unsigned long nr_huge; + unsigned long nr_shmem; + struct page *dummy_page; +}; + +static inline void uncharge_gather_clear(struct uncharge_gather *ug) +{ + memset(ug, 0, sizeof(*ug)); +} + +static void uncharge_batch(const struct uncharge_gather *ug) { - unsigned long nr_pages = nr_anon + nr_file + nr_kmem; + unsigned long nr_pages = ug->nr_anon + ug->nr_file + ug->nr_kmem; unsigned long flags; - if (!mem_cgroup_is_root(memcg)) { - page_counter_uncharge(&memcg->memory, nr_pages); + if (!mem_cgroup_is_root(ug->memcg)) { + page_counter_uncharge(&ug->memcg->memory, nr_pages); if (do_memsw_account()) - page_counter_uncharge(&memcg->memsw, nr_pages); - if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && nr_kmem) - page_counter_uncharge(&memcg->kmem, nr_kmem); - memcg_oom_recover(memcg); + page_counter_uncharge(&ug->memcg->memsw, nr_pages); + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && ug->nr_kmem) + page_counter_uncharge(&ug->memcg->kmem, ug->nr_kmem); + memcg_oom_recover(ug->memcg); } local_irq_save(flags); - __this_cpu_sub(memcg->stat->count[MEMCG_RSS], nr_anon); - __this_cpu_sub(memcg->stat->count[MEMCG_CACHE], nr_file); - __this_cpu_sub(memcg->stat->count[MEMCG_RSS_HUGE], nr_huge); - __this_cpu_sub(memcg->stat->count[NR_SHMEM], nr_shmem); - __this_cpu_add(memcg->stat->events[PGPGOUT], pgpgout); - __this_cpu_add(memcg->stat->nr_page_events, nr_pages); - memcg_check_events(memcg, dummy_page); + __this_cpu_sub(ug->memcg->stat->count[MEMCG_RSS], ug->nr_anon); + __this_cpu_sub(ug->memcg->stat->count[MEMCG_CACHE], ug->nr_file); + __this_cpu_sub(ug->memcg->stat->count[MEMCG_RSS_HUGE], ug->nr_huge); + __this_cpu_sub(ug->memcg->stat->count[NR_SHMEM], ug->nr_shmem); + __this_cpu_add(ug->memcg->stat->events[PGPGOUT], ug->pgpgout); + __this_cpu_add(ug->memcg->stat->nr_page_events, nr_pages); + memcg_check_events(ug->memcg, ug->dummy_page); local_irq_restore(flags); - if (!mem_cgroup_is_root(memcg)) - css_put_many(&memcg->css, nr_pages); + if (!mem_cgroup_is_root(ug->memcg)) + css_put_many(&ug->memcg->css, nr_pages); +} + +static void uncharge_page(struct page *page, struct uncharge_gather *ug) +{ + VM_BUG_ON_PAGE(PageLRU(page), page); + VM_BUG_ON_PAGE(!PageHWPoison(page) && page_count(page), page); + + if (!page->mem_cgroup) + return; + + /* + * Nobody should be changing or seriously looking at + * page->mem_cgroup at this point, we have fully + * exclusive access to the page. + */ + + if (ug->memcg != page->mem_cgroup) { + if (ug->memcg) { + uncharge_batch(ug); + uncharge_gather_clear(ug); + } + ug->memcg = page->mem_cgroup; + } + + if (!PageKmemcg(page)) { + unsigned int nr_pages = 1; + + if (PageTransHuge(page)) { + nr_pages <<= compound_order(page); + ug->nr_huge += nr_pages; + } + if (PageAnon(page)) + ug->nr_anon += nr_pages; + else { + ug->nr_file += nr_pages; + if (PageSwapBacked(page)) + ug->nr_shmem += nr_pages; + } + ug->pgpgout++; + } else { + ug->nr_kmem += 1 << compound_order(page); + __ClearPageKmemcg(page); + } + + ug->dummy_page = page; + page->mem_cgroup = NULL; } static void uncharge_list(struct list_head *page_list) { - struct mem_cgroup *memcg = NULL; - unsigned long nr_shmem = 0; - unsigned long nr_anon = 0; - unsigned long nr_file = 0; - unsigned long nr_huge = 0; - unsigned long nr_kmem = 0; - unsigned long pgpgout = 0; + struct uncharge_gather ug; struct list_head *next; - struct page *page; + + uncharge_gather_clear(&ug); /* * Note that the list can be a single page->lru; hence the @@ -5584,57 +5704,16 @@ static void uncharge_list(struct list_head *page_list) */ next = page_list->next; do { + struct page *page; + page = list_entry(next, struct page, lru); next = page->lru.next; - VM_BUG_ON_PAGE(PageLRU(page), page); - VM_BUG_ON_PAGE(!PageHWPoison(page) && page_count(page), page); - - if (!page->mem_cgroup) - continue; - - /* - * Nobody should be changing or seriously looking at - * page->mem_cgroup at this point, we have fully - * exclusive access to the page. - */ - - if (memcg != page->mem_cgroup) { - if (memcg) { - uncharge_batch(memcg, pgpgout, nr_anon, nr_file, - nr_kmem, nr_huge, nr_shmem, page); - pgpgout = nr_anon = nr_file = nr_kmem = 0; - nr_huge = nr_shmem = 0; - } - memcg = page->mem_cgroup; - } - - if (!PageKmemcg(page)) { - unsigned int nr_pages = 1; - - if (PageTransHuge(page)) { - nr_pages <<= compound_order(page); - nr_huge += nr_pages; - } - if (PageAnon(page)) - nr_anon += nr_pages; - else { - nr_file += nr_pages; - if (PageSwapBacked(page)) - nr_shmem += nr_pages; - } - pgpgout++; - } else { - nr_kmem += 1 << compound_order(page); - __ClearPageKmemcg(page); - } - - page->mem_cgroup = NULL; + uncharge_page(page, &ug); } while (next != page_list); - if (memcg) - uncharge_batch(memcg, pgpgout, nr_anon, nr_file, - nr_kmem, nr_huge, nr_shmem, page); + if (ug.memcg) + uncharge_batch(&ug); } /** @@ -5646,6 +5725,8 @@ static void uncharge_list(struct list_head *page_list) */ void mem_cgroup_uncharge(struct page *page) { + struct uncharge_gather ug; + if (mem_cgroup_disabled()) return; @@ -5653,8 +5734,9 @@ void mem_cgroup_uncharge(struct page *page) if (!page->mem_cgroup) return; - INIT_LIST_HEAD(&page->lru); - uncharge_list(&page->lru); + uncharge_gather_clear(&ug); + uncharge_page(page, &ug); + uncharge_batch(&ug); } /** @@ -5819,8 +5901,7 @@ void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages) this_cpu_sub(memcg->stat->count[MEMCG_SOCK], nr_pages); - page_counter_uncharge(&memcg->memory, nr_pages); - css_put_many(&memcg->css, nr_pages); + refill_stock(memcg, nr_pages); } static int __init cgroup_memory(char *s) @@ -5876,6 +5957,7 @@ static int __init mem_cgroup_init(void) node_online(node) ? node : NUMA_NO_NODE); rtpn->rb_root = RB_ROOT; + rtpn->rb_rightmost = NULL; spin_lock_init(&rtpn->lock); soft_limit_tree.rb_tree_per_node[node] = rtpn; } diff --git a/mm/memory.c b/mm/memory.c index 13ee83b4387872b325414bf46b8f44710c2230f2..ec4e15494901665f99329f2ad094cd3ed3ceed2e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -817,8 +818,8 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, #else # define HAVE_PTE_SPECIAL 0 #endif -struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, - pte_t pte) +struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr, + pte_t pte, bool with_public_device) { unsigned long pfn = pte_pfn(pte); @@ -829,8 +830,31 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, return vma->vm_ops->find_special_page(vma, addr); if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) return NULL; - if (!is_zero_pfn(pfn)) - print_bad_pte(vma, addr, pte, NULL); + if (is_zero_pfn(pfn)) + return NULL; + + /* + * Device public pages are special pages (they are ZONE_DEVICE + * pages but different from persistent memory). They behave + * allmost like normal pages. The difference is that they are + * not on the lru and thus should never be involve with any- + * thing that involve lru manipulation (mlock, numa balancing, + * ...). + * + * This is why we still want to return NULL for such page from + * vm_normal_page() so that we do not have to special case all + * call site of vm_normal_page(). + */ + if (likely(pfn < highest_memmap_pfn)) { + struct page *page = pfn_to_page(pfn); + + if (is_device_public_page(page)) { + if (with_public_device) + return page; + return NULL; + } + } + print_bad_pte(vma, addr, pte, NULL); return NULL; } @@ -956,6 +980,35 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, pte = pte_swp_mksoft_dirty(pte); set_pte_at(src_mm, addr, src_pte, pte); } + } else if (is_device_private_entry(entry)) { + page = device_private_entry_to_page(entry); + + /* + * Update rss count even for unaddressable pages, as + * they should treated just like normal pages in this + * respect. + * + * We will likely want to have some new rss counters + * for unaddressable pages, at some point. But for now + * keep things as they are. + */ + get_page(page); + rss[mm_counter(page)]++; + page_dup_rmap(page, false); + + /* + * We do not preserve soft-dirty information, because so + * far, checkpoint/restore is the only feature that + * requires that. And checkpoint/restore does not work + * when a device driver is involved (you cannot easily + * save and restore device driver state). + */ + if (is_write_device_private_entry(entry) && + is_cow_mapping(vm_flags)) { + make_device_private_entry_read(&entry); + pte = swp_entry_to_pte(entry); + set_pte_at(src_mm, addr, src_pte, pte); + } } goto out_set_pte; } @@ -982,6 +1035,19 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, get_page(page); page_dup_rmap(page, false); rss[mm_counter(page)]++; + } else if (pte_devmap(pte)) { + page = pte_page(pte); + + /* + * Cache coherent device memory behave like regular page and + * not like persistent memory page. For more informations see + * MEMORY_DEVICE_CACHE_COHERENT in memory_hotplug.h + */ + if (is_device_public_page(page)) { + get_page(page); + page_dup_rmap(page, false); + rss[mm_counter(page)]++; + } } out_set_pte: @@ -1065,7 +1131,8 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src src_pmd = pmd_offset(src_pud, addr); do { next = pmd_addr_end(addr, end); - if (pmd_trans_huge(*src_pmd) || pmd_devmap(*src_pmd)) { + if (is_swap_pmd(*src_pmd) || pmd_trans_huge(*src_pmd) + || pmd_devmap(*src_pmd)) { int err; VM_BUG_ON_VMA(next-addr != HPAGE_PMD_SIZE, vma); err = copy_huge_pmd(dst_mm, src_mm, @@ -1236,7 +1303,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, if (pte_present(ptent)) { struct page *page; - page = vm_normal_page(vma, addr, ptent); + page = _vm_normal_page(vma, addr, ptent, true); if (unlikely(details) && page) { /* * unmap_shared_mapping_pages() wants to @@ -1273,6 +1340,29 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, } continue; } + + entry = pte_to_swp_entry(ptent); + if (non_swap_entry(entry) && is_device_private_entry(entry)) { + struct page *page = device_private_entry_to_page(entry); + + if (unlikely(details && details->check_mapping)) { + /* + * unmap_shared_mapping_pages() wants to + * invalidate cache without truncating: + * unmap shared but keep private pages. + */ + if (details->check_mapping != + page_rmapping(page)) + continue; + } + + pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); + rss[mm_counter(page)]--; + page_remove_rmap(page, false); + put_page(page); + continue; + } + /* If details->check_mapping, we leave swap entries. */ if (unlikely(details)) continue; @@ -1326,7 +1416,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, pmd = pmd_offset(pud, addr); do { next = pmd_addr_end(addr, end); - if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { + if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { if (next - addr != HPAGE_PMD_SIZE) { VM_BUG_ON_VMA(vma_is_anonymous(vma) && !rwsem_is_locked(&tlb->mm->mmap_sem), vma); @@ -2671,7 +2761,7 @@ static void unmap_mapping_range_vma(struct vm_area_struct *vma, zap_page_range_single(vma, start_addr, end_addr - start_addr, details); } -static inline void unmap_mapping_range_tree(struct rb_root *root, +static inline void unmap_mapping_range_tree(struct rb_root_cached *root, struct zap_details *details) { struct vm_area_struct *vma; @@ -2735,7 +2825,7 @@ void unmap_mapping_range(struct address_space *mapping, details.last_index = ULONG_MAX; i_mmap_lock_write(mapping); - if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap))) + if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) unmap_mapping_range_tree(&mapping->i_mmap, &details); i_mmap_unlock_write(mapping); } @@ -2775,6 +2865,14 @@ int do_swap_page(struct vm_fault *vmf) if (is_migration_entry(entry)) { migration_entry_wait(vma->vm_mm, vmf->pmd, vmf->address); + } else if (is_device_private_entry(entry)) { + /* + * For un-addressable device memory we call the pgmap + * fault handler callback. The callback must migrate + * the page back to some CPU accessible page. + */ + ret = device_private_entry_fault(vma, vmf->address, entry, + vmf->flags, vmf->pmd); } else if (is_hwpoison_entry(entry)) { ret = VM_FAULT_HWPOISON; } else { @@ -3863,6 +3961,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, .pgoff = linear_page_index(vma, address), .gfp_mask = __get_fault_gfp_mask(vma), }; + unsigned int dirty = flags & FAULT_FLAG_WRITE; struct mm_struct *mm = vma->vm_mm; pgd_t *pgd; p4d_t *p4d; @@ -3885,7 +3984,6 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, barrier(); if (pud_trans_huge(orig_pud) || pud_devmap(orig_pud)) { - unsigned int dirty = flags & FAULT_FLAG_WRITE; /* NUMA case for anonymous PUDs would go here */ @@ -3911,12 +4009,18 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, pmd_t orig_pmd = *vmf.pmd; barrier(); + if (unlikely(is_swap_pmd(orig_pmd))) { + VM_BUG_ON(thp_migration_supported() && + !is_pmd_migration_entry(orig_pmd)); + if (is_pmd_migration_entry(orig_pmd)) + pmd_migration_entry_wait(mm, vmf.pmd); + return 0; + } if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) { if (pmd_protnone(orig_pmd) && vma_is_accessible(vma)) return do_huge_pmd_numa_page(&vmf, orig_pmd); - if ((vmf.flags & FAULT_FLAG_WRITE) && - !pmd_write(orig_pmd)) { + if (dirty && !pmd_write(orig_pmd)) { ret = wp_huge_pmd(&vmf, orig_pmd); if (!(ret & VM_FAULT_FALLBACK)) return ret; @@ -3949,6 +4053,11 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, /* do counter updates before entering really critical section. */ check_sync_rss_stat(current); + if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, + flags & FAULT_FLAG_INSTRUCTION, + flags & FAULT_FLAG_REMOTE)) + return VM_FAULT_SIGSEGV; + /* * Enable the memcg OOM handling for faults triggered in user * space. Kernel faults are handled more gracefully. @@ -3956,11 +4065,6 @@ int handle_mm_fault(struct vm_area_struct *vma, unsigned long address, if (flags & FAULT_FLAG_USER) mem_cgroup_oom_enable(); - if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, - flags & FAULT_FLAG_INSTRUCTION, - flags & FAULT_FLAG_REMOTE)) - return VM_FAULT_SIGSEGV; - if (unlikely(is_vm_hugetlb_page(vma))) ret = hugetlb_fault(vma->vm_mm, vma, address, flags); else diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 73bf17df68996b0c0ae3e8bdc94def3f4b566f73..e882cb6da99425bad30e4017c5feeadacba2e8ce 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -99,7 +99,7 @@ void mem_hotplug_done(void) /* add this memory to iomem resource */ static struct resource *register_memory_resource(u64 start, u64 size) { - struct resource *res; + struct resource *res, *conflict; res = kzalloc(sizeof(struct resource), GFP_KERNEL); if (!res) return ERR_PTR(-ENOMEM); @@ -108,7 +108,13 @@ static struct resource *register_memory_resource(u64 start, u64 size) res->start = start; res->end = start + size - 1; res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; - if (request_resource(&iomem_resource, res) < 0) { + conflict = request_resource_conflict(&iomem_resource, res); + if (conflict) { + if (conflict->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) { + pr_debug("Device unaddressable memory block " + "memory hotplug at %#010llx !\n", + (unsigned long long)start); + } pr_debug("System RAM resource %pR cannot be added\n", res); kfree(res); return ERR_PTR(-EEXIST); @@ -1380,7 +1386,9 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) if (isolate_huge_page(page, &source)) move_pages -= 1 << compound_order(head); continue; - } + } else if (thp_migration_supported() && PageTransHuge(page)) + pfn = page_to_pfn(compound_head(page)) + + hpage_nr_pages(page) - 1; if (!get_page_unless_zero(page)) continue; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 618ab125228baec0810146a0638ce80ce4d50284..006ba625c0b8d4edb6b3ed2b20ce307c254b42be 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -97,6 +97,7 @@ #include #include #include +#include #include #include @@ -411,6 +412,64 @@ struct queue_pages { struct vm_area_struct *prev; }; +/* + * Check if the page's nid is in qp->nmask. + * + * If MPOL_MF_INVERT is set in qp->flags, check if the nid is + * in the invert of qp->nmask. + */ +static inline bool queue_pages_required(struct page *page, + struct queue_pages *qp) +{ + int nid = page_to_nid(page); + unsigned long flags = qp->flags; + + return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT); +} + +static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, + unsigned long end, struct mm_walk *walk) +{ + int ret = 0; + struct page *page; + struct queue_pages *qp = walk->private; + unsigned long flags; + + if (unlikely(is_pmd_migration_entry(*pmd))) { + ret = 1; + goto unlock; + } + page = pmd_page(*pmd); + if (is_huge_zero_page(page)) { + spin_unlock(ptl); + __split_huge_pmd(walk->vma, pmd, addr, false, NULL); + goto out; + } + if (!thp_migration_supported()) { + get_page(page); + spin_unlock(ptl); + lock_page(page); + ret = split_huge_page(page); + unlock_page(page); + put_page(page); + goto out; + } + if (!queue_pages_required(page, qp)) { + ret = 1; + goto unlock; + } + + ret = 1; + flags = qp->flags; + /* go to thp migration */ + if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) + migrate_page_add(page, qp->pagelist, flags); +unlock: + spin_unlock(ptl); +out: + return ret; +} + /* * Scan through pages checking if pages follow certain conditions, * and move them to the pagelist if they do. @@ -422,30 +481,15 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, struct page *page; struct queue_pages *qp = walk->private; unsigned long flags = qp->flags; - int nid, ret; + int ret; pte_t *pte; spinlock_t *ptl; - if (pmd_trans_huge(*pmd)) { - ptl = pmd_lock(walk->mm, pmd); - if (pmd_trans_huge(*pmd)) { - page = pmd_page(*pmd); - if (is_huge_zero_page(page)) { - spin_unlock(ptl); - __split_huge_pmd(vma, pmd, addr, false, NULL); - } else { - get_page(page); - spin_unlock(ptl); - lock_page(page); - ret = split_huge_page(page); - unlock_page(page); - put_page(page); - if (ret) - return 0; - } - } else { - spin_unlock(ptl); - } + ptl = pmd_trans_huge_lock(pmd, vma); + if (ptl) { + ret = queue_pages_pmd(pmd, ptl, addr, end, walk); + if (ret) + return 0; } if (pmd_trans_unstable(pmd)) @@ -464,10 +508,9 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, */ if (PageReserved(page)) continue; - nid = page_to_nid(page); - if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT)) + if (!queue_pages_required(page, qp)) continue; - if (PageTransCompound(page)) { + if (PageTransCompound(page) && !thp_migration_supported()) { get_page(page); pte_unmap_unlock(pte, ptl); lock_page(page); @@ -497,7 +540,6 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, #ifdef CONFIG_HUGETLB_PAGE struct queue_pages *qp = walk->private; unsigned long flags = qp->flags; - int nid; struct page *page; spinlock_t *ptl; pte_t entry; @@ -507,8 +549,7 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, if (!pte_present(entry)) goto unlock; page = pte_page(entry); - nid = page_to_nid(page); - if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT)) + if (!queue_pages_required(page, qp)) goto unlock; /* With MPOL_MF_MOVE, we migrate only unshared hugepage. */ if (flags & (MPOL_MF_MOVE_ALL) || @@ -881,19 +922,21 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask, #ifdef CONFIG_MIGRATION /* - * page migration + * page migration, thp tail pages can be passed. */ static void migrate_page_add(struct page *page, struct list_head *pagelist, unsigned long flags) { + struct page *head = compound_head(page); /* * Avoid migrating a page that is shared with others. */ - if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1) { - if (!isolate_lru_page(page)) { - list_add_tail(&page->lru, pagelist); - inc_node_page_state(page, NR_ISOLATED_ANON + - page_is_file_cache(page)); + if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(head) == 1) { + if (!isolate_lru_page(head)) { + list_add_tail(&head->lru, pagelist); + mod_node_page_state(page_pgdat(head), + NR_ISOLATED_ANON + page_is_file_cache(head), + hpage_nr_pages(head)); } } } @@ -903,7 +946,17 @@ static struct page *new_node_page(struct page *page, unsigned long node, int **x if (PageHuge(page)) return alloc_huge_page_node(page_hstate(compound_head(page)), node); - else + else if (thp_migration_supported() && PageTransHuge(page)) { + struct page *thp; + + thp = alloc_pages_node(node, + (GFP_TRANSHUGE | __GFP_THISNODE), + HPAGE_PMD_ORDER); + if (!thp) + return NULL; + prep_transhuge_page(thp); + return thp; + } else return __alloc_pages_node(node, GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0); } @@ -1069,6 +1122,15 @@ static struct page *new_page(struct page *page, unsigned long start, int **x) if (PageHuge(page)) { BUG_ON(!vma); return alloc_huge_page_noerr(vma, address, 1); + } else if (thp_migration_supported() && PageTransHuge(page)) { + struct page *thp; + + thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address, + HPAGE_PMD_ORDER); + if (!thp) + return NULL; + prep_transhuge_page(thp); + return thp; } /* * if !vma, alloc_page_vma() will use task or system default policy @@ -1683,8 +1745,7 @@ unsigned int mempolicy_slab_node(void) * node in pol->v.nodes (starting from n=0), wrapping around if n exceeds the * number of present nodes. */ -static unsigned offset_il_node(struct mempolicy *pol, - struct vm_area_struct *vma, unsigned long n) +static unsigned offset_il_node(struct mempolicy *pol, unsigned long n) { unsigned nnodes = nodes_weight(pol->v.nodes); unsigned target; @@ -1717,7 +1778,7 @@ static inline unsigned interleave_nid(struct mempolicy *pol, BUG_ON(shift < PAGE_SHIFT); off = vma->vm_pgoff >> (shift - PAGE_SHIFT); off += (addr - vma->vm_start) >> shift; - return offset_il_node(pol, vma, off); + return offset_il_node(pol, off); } else return interleave_nodes(pol); } @@ -2172,20 +2233,15 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long int polnid = -1; int ret = -1; - BUG_ON(!vma); - pol = get_vma_policy(vma, addr); if (!(pol->flags & MPOL_F_MOF)) goto out; switch (pol->mode) { case MPOL_INTERLEAVE: - BUG_ON(addr >= vma->vm_end); - BUG_ON(addr < vma->vm_start); - pgoff = vma->vm_pgoff; pgoff += (addr - vma->vm_start) >> PAGE_SHIFT; - polnid = offset_il_node(pol, vma, pgoff); + polnid = offset_il_node(pol, pgoff); break; case MPOL_PREFERRED: diff --git a/mm/migrate.c b/mm/migrate.c index e84eeb4e43566c7b1ee85e3759b1b60b72c1c532..6954c1435833133f910a08a9cd8e0e1516084296 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -36,6 +36,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -185,8 +188,8 @@ void putback_movable_pages(struct list_head *l) unlock_page(page); put_page(page); } else { - dec_node_page_state(page, NR_ISOLATED_ANON + - page_is_file_cache(page)); + mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + + page_is_file_cache(page), -hpage_nr_pages(page)); putback_lru_page(page); } } @@ -216,6 +219,15 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma, new = page - pvmw.page->index + linear_page_index(vma, pvmw.address); +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION + /* PMD-mapped THP migration entry */ + if (!pvmw.pte) { + VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page); + remove_migration_pmd(&pvmw, new); + continue; + } +#endif + get_page(new); pte = pte_mkold(mk_pte(new, READ_ONCE(vma->vm_page_prot))); if (pte_swp_soft_dirty(*pvmw.pte)) @@ -228,7 +240,17 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma, if (is_write_migration_entry(entry)) pte = maybe_mkwrite(pte, vma); - flush_dcache_page(new); + if (unlikely(is_zone_device_page(new))) { + if (is_device_private_page(new)) { + entry = make_device_private_entry(new, pte_write(pte)); + pte = swp_entry_to_pte(entry); + } else if (is_device_public_page(new)) { + pte = pte_mkdevmap(pte); + flush_dcache_page(new); + } + } else + flush_dcache_page(new); + #ifdef CONFIG_HUGETLB_PAGE if (PageHuge(new)) { pte = pte_mkhuge(pte); @@ -330,6 +352,27 @@ void migration_entry_wait_huge(struct vm_area_struct *vma, __migration_entry_wait(mm, pte, ptl); } +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION +void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd) +{ + spinlock_t *ptl; + struct page *page; + + ptl = pmd_lock(mm, pmd); + if (!is_pmd_migration_entry(*pmd)) + goto unlock; + page = migration_entry_to_page(pmd_to_swp_entry(*pmd)); + if (!get_page_unless_zero(page)) + goto unlock; + spin_unlock(ptl); + wait_on_page_locked(page); + put_page(page); + return; +unlock: + spin_unlock(ptl); +} +#endif + #ifdef CONFIG_BLOCK /* Returns true if all buffers are successfully locked */ static bool buffer_migrate_lock_buffers(struct buffer_head *head, @@ -398,6 +441,13 @@ int migrate_page_move_mapping(struct address_space *mapping, int expected_count = 1 + extra_count; void **pslot; + /* + * Device public or private pages have an extra refcount as they are + * ZONE_DEVICE pages. + */ + expected_count += is_device_private_page(page); + expected_count += is_device_public_page(page); + if (!mapping) { /* Anonymous page without mapping */ if (page_count(page) != expected_count) @@ -604,15 +654,10 @@ static void copy_huge_page(struct page *dst, struct page *src) /* * Copy the page to its new location */ -void migrate_page_copy(struct page *newpage, struct page *page) +void migrate_page_states(struct page *newpage, struct page *page) { int cpupid; - if (PageHuge(page) || PageTransHuge(page)) - copy_huge_page(newpage, page); - else - copy_highpage(newpage, page); - if (PageError(page)) SetPageError(newpage); if (PageReferenced(page)) @@ -666,6 +711,17 @@ void migrate_page_copy(struct page *newpage, struct page *page) mem_cgroup_migrate(page, newpage); } +EXPORT_SYMBOL(migrate_page_states); + +void migrate_page_copy(struct page *newpage, struct page *page) +{ + if (PageHuge(page) || PageTransHuge(page)) + copy_huge_page(newpage, page); + else + copy_highpage(newpage, page); + + migrate_page_states(newpage, page); +} EXPORT_SYMBOL(migrate_page_copy); /************************************************************ @@ -691,7 +747,10 @@ int migrate_page(struct address_space *mapping, if (rc != MIGRATEPAGE_SUCCESS) return rc; - migrate_page_copy(newpage, page); + if (mode != MIGRATE_SYNC_NO_COPY) + migrate_page_copy(newpage, page); + else + migrate_page_states(newpage, page); return MIGRATEPAGE_SUCCESS; } EXPORT_SYMBOL(migrate_page); @@ -741,12 +800,15 @@ int buffer_migrate_page(struct address_space *mapping, SetPagePrivate(newpage); - migrate_page_copy(newpage, page); + if (mode != MIGRATE_SYNC_NO_COPY) + migrate_page_copy(newpage, page); + else + migrate_page_states(newpage, page); bh = head; do { unlock_buffer(bh); - put_bh(bh); + put_bh(bh); bh = bh->b_this_page; } while (bh != head); @@ -805,8 +867,13 @@ static int fallback_migrate_page(struct address_space *mapping, { if (PageDirty(page)) { /* Only writeback pages in full synchronous migration */ - if (mode != MIGRATE_SYNC) + switch (mode) { + case MIGRATE_SYNC: + case MIGRATE_SYNC_NO_COPY: + break; + default: return -EBUSY; + } return writeout(mapping, page); } @@ -943,7 +1010,11 @@ static int __unmap_and_move(struct page *page, struct page *newpage, * the retry loop is too short and in the sync-light case, * the overhead of stalling is too much */ - if (mode != MIGRATE_SYNC) { + switch (mode) { + case MIGRATE_SYNC: + case MIGRATE_SYNC_NO_COPY: + break; + default: rc = -EBUSY; goto out_unlock; } @@ -1088,7 +1159,7 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page, goto out; } - if (unlikely(PageTransHuge(page))) { + if (unlikely(PageTransHuge(page) && !PageTransHuge(newpage))) { lock_page(page); rc = split_huge_page(page); unlock_page(page); @@ -1116,8 +1187,8 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page, * as __PageMovable */ if (likely(!__PageMovable(page))) - dec_node_page_state(page, NR_ISOLATED_ANON + - page_is_file_cache(page)); + mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + + page_is_file_cache(page), -hpage_nr_pages(page)); } /* @@ -1213,8 +1284,15 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, return -ENOMEM; if (!trylock_page(hpage)) { - if (!force || mode != MIGRATE_SYNC) + if (!force) + goto out; + switch (mode) { + case MIGRATE_SYNC: + case MIGRATE_SYNC_NO_COPY: + break; + default: goto out; + } lock_page(hpage); } @@ -1391,7 +1469,17 @@ static struct page *new_page_node(struct page *p, unsigned long private, if (PageHuge(p)) return alloc_huge_page_node(page_hstate(compound_head(p)), pm->node); - else + else if (thp_migration_supported() && PageTransHuge(p)) { + struct page *thp; + + thp = alloc_pages_node(pm->node, + (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM, + HPAGE_PMD_ORDER); + if (!thp) + return NULL; + prep_transhuge_page(thp); + return thp; + } else return __alloc_pages_node(pm->node, GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0); } @@ -1418,6 +1506,8 @@ static int do_move_page_to_node_array(struct mm_struct *mm, for (pp = pm; pp->node != MAX_NUMNODES; pp++) { struct vm_area_struct *vma; struct page *page; + struct page *head; + unsigned int follflags; err = -EFAULT; vma = find_vma(mm, pp->addr); @@ -1425,8 +1515,10 @@ static int do_move_page_to_node_array(struct mm_struct *mm, goto set_status; /* FOLL_DUMP to ignore special (like zero) pages */ - page = follow_page(vma, pp->addr, - FOLL_GET | FOLL_SPLIT | FOLL_DUMP); + follflags = FOLL_GET | FOLL_DUMP; + if (!thp_migration_supported()) + follflags |= FOLL_SPLIT; + page = follow_page(vma, pp->addr, follflags); err = PTR_ERR(page); if (IS_ERR(page)) @@ -1436,7 +1528,6 @@ static int do_move_page_to_node_array(struct mm_struct *mm, if (!page) goto set_status; - pp->page = page; err = page_to_nid(page); if (err == pp->node) @@ -1451,16 +1542,22 @@ static int do_move_page_to_node_array(struct mm_struct *mm, goto put_and_set; if (PageHuge(page)) { - if (PageHead(page)) + if (PageHead(page)) { isolate_huge_page(page, &pagelist); + err = 0; + pp->page = page; + } goto put_and_set; } - err = isolate_lru_page(page); + pp->page = compound_head(page); + head = compound_head(page); + err = isolate_lru_page(head); if (!err) { - list_add_tail(&page->lru, &pagelist); - inc_node_page_state(page, NR_ISOLATED_ANON + - page_is_file_cache(page)); + list_add_tail(&head->lru, &pagelist); + mod_node_page_state(page_pgdat(head), + NR_ISOLATED_ANON + page_is_file_cache(head), + hpage_nr_pages(head)); } put_and_set: /* @@ -2029,3 +2126,859 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, #endif /* CONFIG_NUMA_BALANCING */ #endif /* CONFIG_NUMA */ + +#if defined(CONFIG_MIGRATE_VMA_HELPER) +struct migrate_vma { + struct vm_area_struct *vma; + unsigned long *dst; + unsigned long *src; + unsigned long cpages; + unsigned long npages; + unsigned long start; + unsigned long end; +}; + +static int migrate_vma_collect_hole(unsigned long start, + unsigned long end, + struct mm_walk *walk) +{ + struct migrate_vma *migrate = walk->private; + unsigned long addr; + + for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { + migrate->src[migrate->npages++] = MIGRATE_PFN_MIGRATE; + migrate->dst[migrate->npages] = 0; + migrate->cpages++; + } + + return 0; +} + +static int migrate_vma_collect_skip(unsigned long start, + unsigned long end, + struct mm_walk *walk) +{ + struct migrate_vma *migrate = walk->private; + unsigned long addr; + + for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { + migrate->dst[migrate->npages] = 0; + migrate->src[migrate->npages++] = 0; + } + + return 0; +} + +static int migrate_vma_collect_pmd(pmd_t *pmdp, + unsigned long start, + unsigned long end, + struct mm_walk *walk) +{ + struct migrate_vma *migrate = walk->private; + struct vm_area_struct *vma = walk->vma; + struct mm_struct *mm = vma->vm_mm; + unsigned long addr = start, unmapped = 0; + spinlock_t *ptl; + pte_t *ptep; + +again: + if (pmd_none(*pmdp)) + return migrate_vma_collect_hole(start, end, walk); + + if (pmd_trans_huge(*pmdp)) { + struct page *page; + + ptl = pmd_lock(mm, pmdp); + if (unlikely(!pmd_trans_huge(*pmdp))) { + spin_unlock(ptl); + goto again; + } + + page = pmd_page(*pmdp); + if (is_huge_zero_page(page)) { + spin_unlock(ptl); + split_huge_pmd(vma, pmdp, addr); + if (pmd_trans_unstable(pmdp)) + return migrate_vma_collect_skip(start, end, + walk); + } else { + int ret; + + get_page(page); + spin_unlock(ptl); + if (unlikely(!trylock_page(page))) + return migrate_vma_collect_skip(start, end, + walk); + ret = split_huge_page(page); + unlock_page(page); + put_page(page); + if (ret) + return migrate_vma_collect_skip(start, end, + walk); + if (pmd_none(*pmdp)) + return migrate_vma_collect_hole(start, end, + walk); + } + } + + if (unlikely(pmd_bad(*pmdp))) + return migrate_vma_collect_skip(start, end, walk); + + ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); + arch_enter_lazy_mmu_mode(); + + for (; addr < end; addr += PAGE_SIZE, ptep++) { + unsigned long mpfn, pfn; + struct page *page; + swp_entry_t entry; + pte_t pte; + + pte = *ptep; + pfn = pte_pfn(pte); + + if (pte_none(pte)) { + mpfn = MIGRATE_PFN_MIGRATE; + migrate->cpages++; + pfn = 0; + goto next; + } + + if (!pte_present(pte)) { + mpfn = pfn = 0; + + /* + * Only care about unaddressable device page special + * page table entry. Other special swap entries are not + * migratable, and we ignore regular swapped page. + */ + entry = pte_to_swp_entry(pte); + if (!is_device_private_entry(entry)) + goto next; + + page = device_private_entry_to_page(entry); + mpfn = migrate_pfn(page_to_pfn(page))| + MIGRATE_PFN_DEVICE | MIGRATE_PFN_MIGRATE; + if (is_write_device_private_entry(entry)) + mpfn |= MIGRATE_PFN_WRITE; + } else { + if (is_zero_pfn(pfn)) { + mpfn = MIGRATE_PFN_MIGRATE; + migrate->cpages++; + pfn = 0; + goto next; + } + page = _vm_normal_page(migrate->vma, addr, pte, true); + mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE; + mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0; + } + + /* FIXME support THP */ + if (!page || !page->mapping || PageTransCompound(page)) { + mpfn = pfn = 0; + goto next; + } + pfn = page_to_pfn(page); + + /* + * By getting a reference on the page we pin it and that blocks + * any kind of migration. Side effect is that it "freezes" the + * pte. + * + * We drop this reference after isolating the page from the lru + * for non device page (device page are not on the lru and thus + * can't be dropped from it). + */ + get_page(page); + migrate->cpages++; + + /* + * Optimize for the common case where page is only mapped once + * in one process. If we can lock the page, then we can safely + * set up a special migration page table entry now. + */ + if (trylock_page(page)) { + pte_t swp_pte; + + mpfn |= MIGRATE_PFN_LOCKED; + ptep_get_and_clear(mm, addr, ptep); + + /* Setup special migration page table entry */ + entry = make_migration_entry(page, pte_write(pte)); + swp_pte = swp_entry_to_pte(entry); + if (pte_soft_dirty(pte)) + swp_pte = pte_swp_mksoft_dirty(swp_pte); + set_pte_at(mm, addr, ptep, swp_pte); + + /* + * This is like regular unmap: we remove the rmap and + * drop page refcount. Page won't be freed, as we took + * a reference just above. + */ + page_remove_rmap(page, false); + put_page(page); + + if (pte_present(pte)) + unmapped++; + } + +next: + migrate->dst[migrate->npages] = 0; + migrate->src[migrate->npages++] = mpfn; + } + arch_leave_lazy_mmu_mode(); + pte_unmap_unlock(ptep - 1, ptl); + + /* Only flush the TLB if we actually modified any entries */ + if (unmapped) + flush_tlb_range(walk->vma, start, end); + + return 0; +} + +/* + * migrate_vma_collect() - collect pages over a range of virtual addresses + * @migrate: migrate struct containing all migration information + * + * This will walk the CPU page table. For each virtual address backed by a + * valid page, it updates the src array and takes a reference on the page, in + * order to pin the page until we lock it and unmap it. + */ +static void migrate_vma_collect(struct migrate_vma *migrate) +{ + struct mm_walk mm_walk; + + mm_walk.pmd_entry = migrate_vma_collect_pmd; + mm_walk.pte_entry = NULL; + mm_walk.pte_hole = migrate_vma_collect_hole; + mm_walk.hugetlb_entry = NULL; + mm_walk.test_walk = NULL; + mm_walk.vma = migrate->vma; + mm_walk.mm = migrate->vma->vm_mm; + mm_walk.private = migrate; + + mmu_notifier_invalidate_range_start(mm_walk.mm, + migrate->start, + migrate->end); + walk_page_range(migrate->start, migrate->end, &mm_walk); + mmu_notifier_invalidate_range_end(mm_walk.mm, + migrate->start, + migrate->end); + + migrate->end = migrate->start + (migrate->npages << PAGE_SHIFT); +} + +/* + * migrate_vma_check_page() - check if page is pinned or not + * @page: struct page to check + * + * Pinned pages cannot be migrated. This is the same test as in + * migrate_page_move_mapping(), except that here we allow migration of a + * ZONE_DEVICE page. + */ +static bool migrate_vma_check_page(struct page *page) +{ + /* + * One extra ref because caller holds an extra reference, either from + * isolate_lru_page() for a regular page, or migrate_vma_collect() for + * a device page. + */ + int extra = 1; + + /* + * FIXME support THP (transparent huge page), it is bit more complex to + * check them than regular pages, because they can be mapped with a pmd + * or with a pte (split pte mapping). + */ + if (PageCompound(page)) + return false; + + /* Page from ZONE_DEVICE have one extra reference */ + if (is_zone_device_page(page)) { + /* + * Private page can never be pin as they have no valid pte and + * GUP will fail for those. Yet if there is a pending migration + * a thread might try to wait on the pte migration entry and + * will bump the page reference count. Sadly there is no way to + * differentiate a regular pin from migration wait. Hence to + * avoid 2 racing thread trying to migrate back to CPU to enter + * infinite loop (one stoping migration because the other is + * waiting on pte migration entry). We always return true here. + * + * FIXME proper solution is to rework migration_entry_wait() so + * it does not need to take a reference on page. + */ + if (is_device_private_page(page)) + return true; + + /* + * Only allow device public page to be migrated and account for + * the extra reference count imply by ZONE_DEVICE pages. + */ + if (!is_device_public_page(page)) + return false; + extra++; + } + + /* For file back page */ + if (page_mapping(page)) + extra += 1 + page_has_private(page); + + if ((page_count(page) - extra) > page_mapcount(page)) + return false; + + return true; +} + +/* + * migrate_vma_prepare() - lock pages and isolate them from the lru + * @migrate: migrate struct containing all migration information + * + * This locks pages that have been collected by migrate_vma_collect(). Once each + * page is locked it is isolated from the lru (for non-device pages). Finally, + * the ref taken by migrate_vma_collect() is dropped, as locked pages cannot be + * migrated by concurrent kernel threads. + */ +static void migrate_vma_prepare(struct migrate_vma *migrate) +{ + const unsigned long npages = migrate->npages; + const unsigned long start = migrate->start; + unsigned long addr, i, restore = 0; + bool allow_drain = true; + + lru_add_drain(); + + for (i = 0; (i < npages) && migrate->cpages; i++) { + struct page *page = migrate_pfn_to_page(migrate->src[i]); + bool remap = true; + + if (!page) + continue; + + if (!(migrate->src[i] & MIGRATE_PFN_LOCKED)) { + /* + * Because we are migrating several pages there can be + * a deadlock between 2 concurrent migration where each + * are waiting on each other page lock. + * + * Make migrate_vma() a best effort thing and backoff + * for any page we can not lock right away. + */ + if (!trylock_page(page)) { + migrate->src[i] = 0; + migrate->cpages--; + put_page(page); + continue; + } + remap = false; + migrate->src[i] |= MIGRATE_PFN_LOCKED; + } + + /* ZONE_DEVICE pages are not on LRU */ + if (!is_zone_device_page(page)) { + if (!PageLRU(page) && allow_drain) { + /* Drain CPU's pagevec */ + lru_add_drain_all(); + allow_drain = false; + } + + if (isolate_lru_page(page)) { + if (remap) { + migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; + migrate->cpages--; + restore++; + } else { + migrate->src[i] = 0; + unlock_page(page); + migrate->cpages--; + put_page(page); + } + continue; + } + + /* Drop the reference we took in collect */ + put_page(page); + } + + if (!migrate_vma_check_page(page)) { + if (remap) { + migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; + migrate->cpages--; + restore++; + + if (!is_zone_device_page(page)) { + get_page(page); + putback_lru_page(page); + } + } else { + migrate->src[i] = 0; + unlock_page(page); + migrate->cpages--; + + if (!is_zone_device_page(page)) + putback_lru_page(page); + else + put_page(page); + } + } + } + + for (i = 0, addr = start; i < npages && restore; i++, addr += PAGE_SIZE) { + struct page *page = migrate_pfn_to_page(migrate->src[i]); + + if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE)) + continue; + + remove_migration_pte(page, migrate->vma, addr, page); + + migrate->src[i] = 0; + unlock_page(page); + put_page(page); + restore--; + } +} + +/* + * migrate_vma_unmap() - replace page mapping with special migration pte entry + * @migrate: migrate struct containing all migration information + * + * Replace page mapping (CPU page table pte) with a special migration pte entry + * and check again if it has been pinned. Pinned pages are restored because we + * cannot migrate them. + * + * This is the last step before we call the device driver callback to allocate + * destination memory and copy contents of original page over to new page. + */ +static void migrate_vma_unmap(struct migrate_vma *migrate) +{ + int flags = TTU_MIGRATION | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS; + const unsigned long npages = migrate->npages; + const unsigned long start = migrate->start; + unsigned long addr, i, restore = 0; + + for (i = 0; i < npages; i++) { + struct page *page = migrate_pfn_to_page(migrate->src[i]); + + if (!page || !(migrate->src[i] & MIGRATE_PFN_MIGRATE)) + continue; + + if (page_mapped(page)) { + try_to_unmap(page, flags); + if (page_mapped(page)) + goto restore; + } + + if (migrate_vma_check_page(page)) + continue; + +restore: + migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; + migrate->cpages--; + restore++; + } + + for (addr = start, i = 0; i < npages && restore; addr += PAGE_SIZE, i++) { + struct page *page = migrate_pfn_to_page(migrate->src[i]); + + if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE)) + continue; + + remove_migration_ptes(page, page, false); + + migrate->src[i] = 0; + unlock_page(page); + restore--; + + if (is_zone_device_page(page)) + put_page(page); + else + putback_lru_page(page); + } +} + +static void migrate_vma_insert_page(struct migrate_vma *migrate, + unsigned long addr, + struct page *page, + unsigned long *src, + unsigned long *dst) +{ + struct vm_area_struct *vma = migrate->vma; + struct mm_struct *mm = vma->vm_mm; + struct mem_cgroup *memcg; + bool flush = false; + spinlock_t *ptl; + pte_t entry; + pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + + /* Only allow populating anonymous memory */ + if (!vma_is_anonymous(vma)) + goto abort; + + pgdp = pgd_offset(mm, addr); + p4dp = p4d_alloc(mm, pgdp, addr); + if (!p4dp) + goto abort; + pudp = pud_alloc(mm, p4dp, addr); + if (!pudp) + goto abort; + pmdp = pmd_alloc(mm, pudp, addr); + if (!pmdp) + goto abort; + + if (pmd_trans_huge(*pmdp) || pmd_devmap(*pmdp)) + goto abort; + + /* + * Use pte_alloc() instead of pte_alloc_map(). We can't run + * pte_offset_map() on pmds where a huge pmd might be created + * from a different thread. + * + * pte_alloc_map() is safe to use under down_write(mmap_sem) or when + * parallel threads are excluded by other means. + * + * Here we only have down_read(mmap_sem). + */ + if (pte_alloc(mm, pmdp, addr)) + goto abort; + + /* See the comment in pte_alloc_one_map() */ + if (unlikely(pmd_trans_unstable(pmdp))) + goto abort; + + if (unlikely(anon_vma_prepare(vma))) + goto abort; + if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg, false)) + goto abort; + + /* + * The memory barrier inside __SetPageUptodate makes sure that + * preceding stores to the page contents become visible before + * the set_pte_at() write. + */ + __SetPageUptodate(page); + + if (is_zone_device_page(page)) { + if (is_device_private_page(page)) { + swp_entry_t swp_entry; + + swp_entry = make_device_private_entry(page, vma->vm_flags & VM_WRITE); + entry = swp_entry_to_pte(swp_entry); + } else if (is_device_public_page(page)) { + entry = pte_mkold(mk_pte(page, READ_ONCE(vma->vm_page_prot))); + if (vma->vm_flags & VM_WRITE) + entry = pte_mkwrite(pte_mkdirty(entry)); + entry = pte_mkdevmap(entry); + } + } else { + entry = mk_pte(page, vma->vm_page_prot); + if (vma->vm_flags & VM_WRITE) + entry = pte_mkwrite(pte_mkdirty(entry)); + } + + ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); + + if (pte_present(*ptep)) { + unsigned long pfn = pte_pfn(*ptep); + + if (!is_zero_pfn(pfn)) { + pte_unmap_unlock(ptep, ptl); + mem_cgroup_cancel_charge(page, memcg, false); + goto abort; + } + flush = true; + } else if (!pte_none(*ptep)) { + pte_unmap_unlock(ptep, ptl); + mem_cgroup_cancel_charge(page, memcg, false); + goto abort; + } + + /* + * Check for usefaultfd but do not deliver the fault. Instead, + * just back off. + */ + if (userfaultfd_missing(vma)) { + pte_unmap_unlock(ptep, ptl); + mem_cgroup_cancel_charge(page, memcg, false); + goto abort; + } + + inc_mm_counter(mm, MM_ANONPAGES); + page_add_new_anon_rmap(page, vma, addr, false); + mem_cgroup_commit_charge(page, memcg, false, false); + if (!is_zone_device_page(page)) + lru_cache_add_active_or_unevictable(page, vma); + get_page(page); + + if (flush) { + flush_cache_page(vma, addr, pte_pfn(*ptep)); + ptep_clear_flush_notify(vma, addr, ptep); + set_pte_at_notify(mm, addr, ptep, entry); + update_mmu_cache(vma, addr, ptep); + } else { + /* No need to invalidate - it was non-present before */ + set_pte_at(mm, addr, ptep, entry); + update_mmu_cache(vma, addr, ptep); + } + + pte_unmap_unlock(ptep, ptl); + *src = MIGRATE_PFN_MIGRATE; + return; + +abort: + *src &= ~MIGRATE_PFN_MIGRATE; +} + +/* + * migrate_vma_pages() - migrate meta-data from src page to dst page + * @migrate: migrate struct containing all migration information + * + * This migrates struct page meta-data from source struct page to destination + * struct page. This effectively finishes the migration from source page to the + * destination page. + */ +static void migrate_vma_pages(struct migrate_vma *migrate) +{ + const unsigned long npages = migrate->npages; + const unsigned long start = migrate->start; + struct vm_area_struct *vma = migrate->vma; + struct mm_struct *mm = vma->vm_mm; + unsigned long addr, i, mmu_start; + bool notified = false; + + for (i = 0, addr = start; i < npages; addr += PAGE_SIZE, i++) { + struct page *newpage = migrate_pfn_to_page(migrate->dst[i]); + struct page *page = migrate_pfn_to_page(migrate->src[i]); + struct address_space *mapping; + int r; + + if (!newpage) { + migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; + continue; + } + + if (!page) { + if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE)) { + continue; + } + if (!notified) { + mmu_start = addr; + notified = true; + mmu_notifier_invalidate_range_start(mm, + mmu_start, + migrate->end); + } + migrate_vma_insert_page(migrate, addr, newpage, + &migrate->src[i], + &migrate->dst[i]); + continue; + } + + mapping = page_mapping(page); + + if (is_zone_device_page(newpage)) { + if (is_device_private_page(newpage)) { + /* + * For now only support private anonymous when + * migrating to un-addressable device memory. + */ + if (mapping) { + migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; + continue; + } + } else if (!is_device_public_page(newpage)) { + /* + * Other types of ZONE_DEVICE page are not + * supported. + */ + migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; + continue; + } + } + + r = migrate_page(mapping, newpage, page, MIGRATE_SYNC_NO_COPY); + if (r != MIGRATEPAGE_SUCCESS) + migrate->src[i] &= ~MIGRATE_PFN_MIGRATE; + } + + if (notified) + mmu_notifier_invalidate_range_end(mm, mmu_start, + migrate->end); +} + +/* + * migrate_vma_finalize() - restore CPU page table entry + * @migrate: migrate struct containing all migration information + * + * This replaces the special migration pte entry with either a mapping to the + * new page if migration was successful for that page, or to the original page + * otherwise. + * + * This also unlocks the pages and puts them back on the lru, or drops the extra + * refcount, for device pages. + */ +static void migrate_vma_finalize(struct migrate_vma *migrate) +{ + const unsigned long npages = migrate->npages; + unsigned long i; + + for (i = 0; i < npages; i++) { + struct page *newpage = migrate_pfn_to_page(migrate->dst[i]); + struct page *page = migrate_pfn_to_page(migrate->src[i]); + + if (!page) { + if (newpage) { + unlock_page(newpage); + put_page(newpage); + } + continue; + } + + if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE) || !newpage) { + if (newpage) { + unlock_page(newpage); + put_page(newpage); + } + newpage = page; + } + + remove_migration_ptes(page, newpage, false); + unlock_page(page); + migrate->cpages--; + + if (is_zone_device_page(page)) + put_page(page); + else + putback_lru_page(page); + + if (newpage != page) { + unlock_page(newpage); + if (is_zone_device_page(newpage)) + put_page(newpage); + else + putback_lru_page(newpage); + } + } +} + +/* + * migrate_vma() - migrate a range of memory inside vma + * + * @ops: migration callback for allocating destination memory and copying + * @vma: virtual memory area containing the range to be migrated + * @start: start address of the range to migrate (inclusive) + * @end: end address of the range to migrate (exclusive) + * @src: array of hmm_pfn_t containing source pfns + * @dst: array of hmm_pfn_t containing destination pfns + * @private: pointer passed back to each of the callback + * Returns: 0 on success, error code otherwise + * + * This function tries to migrate a range of memory virtual address range, using + * callbacks to allocate and copy memory from source to destination. First it + * collects all the pages backing each virtual address in the range, saving this + * inside the src array. Then it locks those pages and unmaps them. Once the pages + * are locked and unmapped, it checks whether each page is pinned or not. Pages + * that aren't pinned have the MIGRATE_PFN_MIGRATE flag set (by this function) + * in the corresponding src array entry. It then restores any pages that are + * pinned, by remapping and unlocking those pages. + * + * At this point it calls the alloc_and_copy() callback. For documentation on + * what is expected from that callback, see struct migrate_vma_ops comments in + * include/linux/migrate.h + * + * After the alloc_and_copy() callback, this function goes over each entry in + * the src array that has the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag + * set. If the corresponding entry in dst array has MIGRATE_PFN_VALID flag set, + * then the function tries to migrate struct page information from the source + * struct page to the destination struct page. If it fails to migrate the struct + * page information, then it clears the MIGRATE_PFN_MIGRATE flag in the src + * array. + * + * At this point all successfully migrated pages have an entry in the src + * array with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set and the dst + * array entry with MIGRATE_PFN_VALID flag set. + * + * It then calls the finalize_and_map() callback. See comments for "struct + * migrate_vma_ops", in include/linux/migrate.h for details about + * finalize_and_map() behavior. + * + * After the finalize_and_map() callback, for successfully migrated pages, this + * function updates the CPU page table to point to new pages, otherwise it + * restores the CPU page table to point to the original source pages. + * + * Function returns 0 after the above steps, even if no pages were migrated + * (The function only returns an error if any of the arguments are invalid.) + * + * Both src and dst array must be big enough for (end - start) >> PAGE_SHIFT + * unsigned long entries. + */ +int migrate_vma(const struct migrate_vma_ops *ops, + struct vm_area_struct *vma, + unsigned long start, + unsigned long end, + unsigned long *src, + unsigned long *dst, + void *private) +{ + struct migrate_vma migrate; + + /* Sanity check the arguments */ + start &= PAGE_MASK; + end &= PAGE_MASK; + if (!vma || is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) + return -EINVAL; + if (start < vma->vm_start || start >= vma->vm_end) + return -EINVAL; + if (end <= vma->vm_start || end > vma->vm_end) + return -EINVAL; + if (!ops || !src || !dst || start >= end) + return -EINVAL; + + memset(src, 0, sizeof(*src) * ((end - start) >> PAGE_SHIFT)); + migrate.src = src; + migrate.dst = dst; + migrate.start = start; + migrate.npages = 0; + migrate.cpages = 0; + migrate.end = end; + migrate.vma = vma; + + /* Collect, and try to unmap source pages */ + migrate_vma_collect(&migrate); + if (!migrate.cpages) + return 0; + + /* Lock and isolate page */ + migrate_vma_prepare(&migrate); + if (!migrate.cpages) + return 0; + + /* Unmap pages */ + migrate_vma_unmap(&migrate); + if (!migrate.cpages) + return 0; + + /* + * At this point pages are locked and unmapped, and thus they have + * stable content and can safely be copied to destination memory that + * is allocated by the callback. + * + * Note that migration can fail in migrate_vma_struct_page() for each + * individual page. + */ + ops->alloc_and_copy(vma, src, dst, start, end, private); + + /* This does the real migration of struct page */ + migrate_vma_pages(&migrate); + + ops->finalize_and_map(vma, src, dst, start, end, private); + + /* Unlock and remap pages */ + migrate_vma_finalize(&migrate); + + return 0; +} +EXPORT_SYMBOL(migrate_vma); +#endif /* defined(MIGRATE_VMA_HELPER) */ diff --git a/mm/mlock.c b/mm/mlock.c index b562b5523a6544e6c0ae6e4f792943441f6217a3..dfc6f19121768ee85f269a8ddd1d5bc0f7a5b6fe 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -365,8 +365,8 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) * @start + PAGE_SIZE when no page could be added by the pte walk. */ static unsigned long __munlock_pagevec_fill(struct pagevec *pvec, - struct vm_area_struct *vma, int zoneid, unsigned long start, - unsigned long end) + struct vm_area_struct *vma, struct zone *zone, + unsigned long start, unsigned long end) { pte_t *pte; spinlock_t *ptl; @@ -394,7 +394,7 @@ static unsigned long __munlock_pagevec_fill(struct pagevec *pvec, * Break if page could not be obtained or the page's node+zone does not * match */ - if (!page || page_zone_id(page) != zoneid) + if (!page || page_zone(page) != zone) break; /* @@ -446,7 +446,6 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, unsigned long page_increm; struct pagevec pvec; struct zone *zone; - int zoneid; pagevec_init(&pvec, 0); /* @@ -481,7 +480,6 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, */ pagevec_add(&pvec, page); zone = page_zone(page); - zoneid = page_zone_id(page); /* * Try to fill the rest of pagevec using fast @@ -490,7 +488,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, * pagevec. */ start = __munlock_pagevec_fill(&pvec, vma, - zoneid, start, end); + zone, start, end); __munlock_pagevec(&pvec, zone); goto next; } diff --git a/mm/mmap.c b/mm/mmap.c index 4c5981651407813ceb51cf7c717d7fbec0d86a40..680506faceae91d9da5347f27f1271a9b542addf 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -685,7 +685,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start, struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *next = vma->vm_next, *orig_vma = vma; struct address_space *mapping = NULL; - struct rb_root *root = NULL; + struct rb_root_cached *root = NULL; struct anon_vma *anon_vma = NULL; struct file *file = vma->vm_file; bool start_changed = false, end_changed = false; @@ -3340,7 +3340,7 @@ static DEFINE_MUTEX(mm_all_locks_mutex); static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma) { - if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) { + if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) { /* * The LSB of head.next can't change from under us * because we hold the mm_all_locks_mutex. @@ -3356,7 +3356,7 @@ static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma) * anon_vma->root->rwsem. */ if (__test_and_set_bit(0, (unsigned long *) - &anon_vma->root->rb_root.rb_node)) + &anon_vma->root->rb_root.rb_root.rb_node)) BUG(); } } @@ -3458,7 +3458,7 @@ int mm_take_all_locks(struct mm_struct *mm) static void vm_unlock_anon_vma(struct anon_vma *anon_vma) { - if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_node)) { + if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) { /* * The LSB of head.next can't change to 0 from under * us because we hold the mm_all_locks_mutex. @@ -3472,7 +3472,7 @@ static void vm_unlock_anon_vma(struct anon_vma *anon_vma) * anon_vma->root->rwsem. */ if (!__test_and_clear_bit(0, (unsigned long *) - &anon_vma->root->rb_root.rb_node)) + &anon_vma->root->rb_root.rb_root.rb_node)) BUG(); anon_vma_unlock_write(anon_vma); } diff --git a/mm/mprotect.c b/mm/mprotect.c index bd0f409922cb2fc133f9fecba64a839380d4f937..6d3e2f0822901605aa3554234e3e96838997b0ba 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -125,6 +125,20 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, pages++; } + + if (is_write_device_private_entry(entry)) { + pte_t newpte; + + /* + * We do not preserve soft-dirtiness. See + * copy_one_pte() for explanation. + */ + make_device_private_entry_read(&entry); + newpte = swp_entry_to_pte(entry); + set_pte_at(mm, addr, pte, newpte); + + pages++; + } } } while (pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); @@ -149,7 +163,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, unsigned long this_pages; next = pmd_addr_end(addr, end); - if (!pmd_trans_huge(*pmd) && !pmd_devmap(*pmd) + if (!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd) && !pmd_devmap(*pmd) && pmd_none_or_clear_bad(pmd)) continue; @@ -159,7 +173,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, mmu_notifier_invalidate_range_start(mm, mni_start, end); } - if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { + if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { if (next - addr != HPAGE_PMD_SIZE) { __split_huge_pmd(vma, pmd, addr, false, NULL); } else { diff --git a/mm/mremap.c b/mm/mremap.c index 7395564daa6c228ab704be92b718112dbb2fb6aa..cfec004c4ff9a95511940039d6975d270bf37996 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -223,7 +223,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma, new_pmd = alloc_new_pmd(vma->vm_mm, vma, new_addr); if (!new_pmd) break; - if (pmd_trans_huge(*old_pmd)) { + if (is_swap_pmd(*old_pmd) || pmd_trans_huge(*old_pmd)) { if (extent == HPAGE_PMD_SIZE) { bool moved; /* See comment in move_ptes() */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a9add06fe7687eba0812a112f5aa213693401d9d..c841af88836ad63d548a2f007220203e4a8ddf9a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2741,18 +2741,18 @@ int __isolate_free_page(struct page *page, unsigned int order) static inline void zone_statistics(struct zone *preferred_zone, struct zone *z) { #ifdef CONFIG_NUMA - enum zone_stat_item local_stat = NUMA_LOCAL; + enum numa_stat_item local_stat = NUMA_LOCAL; if (z->node != numa_node_id()) local_stat = NUMA_OTHER; if (z->node == preferred_zone->node) - __inc_zone_state(z, NUMA_HIT); + __inc_numa_state(z, NUMA_HIT); else { - __inc_zone_state(z, NUMA_MISS); - __inc_zone_state(preferred_zone, NUMA_FOREIGN); + __inc_numa_state(z, NUMA_MISS); + __inc_numa_state(preferred_zone, NUMA_FOREIGN); } - __inc_zone_state(z, local_stat); + __inc_numa_state(z, local_stat); #endif } @@ -4183,10 +4183,11 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, { struct page *page; unsigned int alloc_flags = ALLOC_WMARK_LOW; - gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */ + gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */ struct alloc_context ac = { }; gfp_mask &= gfp_allowed_mask; + alloc_mask = gfp_mask; if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags)) return NULL; diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 8ec6ba230bb9dabba967489f0ba8f15ca61b8806..6a03946469a99eb535851194f519893a5e8a2d11 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -48,6 +48,7 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw) if (!is_swap_pte(*pvmw->pte)) return false; entry = pte_to_swp_entry(*pvmw->pte); + if (!is_migration_entry(entry)) return false; if (migration_entry_to_page(entry) - pvmw->page >= @@ -60,6 +61,15 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw) WARN_ON_ONCE(1); #endif } else { + if (is_swap_pte(*pvmw->pte)) { + swp_entry_t entry; + + entry = pte_to_swp_entry(*pvmw->pte); + if (is_device_private_entry(entry) && + device_private_entry_to_page(entry) == pvmw->page) + return true; + } + if (!pte_present(*pvmw->pte)) return false; @@ -138,16 +148,28 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) if (!pud_present(*pud)) return false; pvmw->pmd = pmd_offset(pud, pvmw->address); - if (pmd_trans_huge(*pvmw->pmd)) { + if (pmd_trans_huge(*pvmw->pmd) || is_pmd_migration_entry(*pvmw->pmd)) { pvmw->ptl = pmd_lock(mm, pvmw->pmd); - if (!pmd_present(*pvmw->pmd)) - return not_found(pvmw); if (likely(pmd_trans_huge(*pvmw->pmd))) { if (pvmw->flags & PVMW_MIGRATION) return not_found(pvmw); if (pmd_page(*pvmw->pmd) != page) return not_found(pvmw); return true; + } else if (!pmd_present(*pvmw->pmd)) { + if (thp_migration_supported()) { + if (!(pvmw->flags & PVMW_MIGRATION)) + return not_found(pvmw); + if (is_migration_entry(pmd_to_swp_entry(*pvmw->pmd))) { + swp_entry_t entry = pmd_to_swp_entry(*pvmw->pmd); + + if (migration_entry_to_page(entry) != page) + return not_found(pvmw); + return true; + } + } else + WARN_ONCE(1, "Non present huge pmd without pmd migration enabled!"); + return not_found(pvmw); } else { /* THP pmd was split under us: handle on pte level */ spin_unlock(pvmw->ptl); diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index c99d9512a45b8a1599f0d679ec92d9e4511b3d68..1175f6a24fdb09d49af7ab93061b56b527d554a1 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -124,7 +124,8 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, { pmd_t pmd; VM_BUG_ON(address & ~HPAGE_PMD_MASK); - VM_BUG_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp)); + VM_BUG_ON((pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) && + !pmd_devmap(*pmdp)) || !pmd_present(*pmdp)); pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return pmd; diff --git a/mm/rmap.c b/mm/rmap.c index c570f82e6827153316465b9e18f0fca376a1c1a1..b874c4761e8422829610d9a1173c56139db5823b 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -63,6 +63,7 @@ #include #include #include +#include #include @@ -390,7 +391,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma) * Leave empty anon_vmas on the list - we'll need * to free them outside the lock. */ - if (RB_EMPTY_ROOT(&anon_vma->rb_root)) { + if (RB_EMPTY_ROOT(&anon_vma->rb_root.rb_root)) { anon_vma->parent->degree--; continue; } @@ -424,7 +425,7 @@ static void anon_vma_ctor(void *data) init_rwsem(&anon_vma->rwsem); atomic_set(&anon_vma->refcount, 0); - anon_vma->rb_root = RB_ROOT; + anon_vma->rb_root = RB_ROOT_CACHED; } void __init anon_vma_init(void) @@ -1346,9 +1347,13 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED)) return true; + if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) && + is_zone_device_page(page) && !is_device_private_page(page)) + return true; + if (flags & TTU_SPLIT_HUGE_PMD) { split_huge_pmd_address(vma, address, - flags & TTU_MIGRATION, page); + flags & TTU_SPLIT_FREEZE, page); } /* @@ -1360,6 +1365,19 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, mmu_notifier_invalidate_range_start(vma->vm_mm, start, end); while (page_vma_mapped_walk(&pvmw)) { +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION + /* PMD-mapped THP migration entry */ + if (!pvmw.pte && (flags & TTU_MIGRATION)) { + VM_BUG_ON_PAGE(PageHuge(page) || !PageTransCompound(page), page); + + if (!PageAnon(page)) + continue; + + set_pmd_migration_entry(&pvmw, page); + continue; + } +#endif + /* * If the page is mlock()d, we cannot swap it out. * If it's recently referenced (perhaps page_referenced @@ -1390,6 +1408,27 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, address = pvmw.address; + if (IS_ENABLED(CONFIG_MIGRATION) && + (flags & TTU_MIGRATION) && + is_zone_device_page(page)) { + swp_entry_t entry; + pte_t swp_pte; + + pteval = ptep_get_and_clear(mm, pvmw.address, pvmw.pte); + + /* + * Store the pfn of the page in a special migration + * pte. do_swap_page() will wait until the migration + * pte is removed and then restart fault handling. + */ + entry = make_migration_entry(page, 0); + swp_pte = swp_entry_to_pte(entry); + if (pte_soft_dirty(pteval)) + swp_pte = pte_swp_mksoft_dirty(swp_pte); + set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte); + goto discard; + } + if (!(flags & TTU_IGNORE_ACCESS)) { if (ptep_clear_flush_young_notify(vma, address, pvmw.pte)) { @@ -1445,7 +1484,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, */ dec_mm_counter(mm, mm_counter(page)); } else if (IS_ENABLED(CONFIG_MIGRATION) && - (flags & TTU_MIGRATION)) { + (flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))) { swp_entry_t entry; pte_t swp_pte; /* @@ -1575,7 +1614,8 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags) * locking requirements of exec(), migration skips * temporary VMAs until after exec() completes. */ - if ((flags & TTU_MIGRATION) && !PageKsm(page) && PageAnon(page)) + if ((flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE)) + && !PageKsm(page) && PageAnon(page)) rwc.invalid_vma = invalid_migration_vma; if (flags & TTU_RMAP_LOCKED) diff --git a/mm/slub.c b/mm/slub.c index ddb04576b342a17f2b6c93c3434ca59aef76082c..d39a5d3834b31c0ab53d8d808c1224dbb2076a2a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4232,7 +4232,7 @@ void __init kmem_cache_init(void) cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL, slub_cpu_dead); - pr_info("SLUB: HWalign=%d, Order=%d-%d, MinObjects=%d, CPUs=%d, Nodes=%d\n", + pr_info("SLUB: HWalign=%d, Order=%d-%d, MinObjects=%d, CPUs=%u, Nodes=%d\n", cache_line_size(), slub_min_order, slub_max_order, slub_min_objects, nr_cpu_ids, nr_node_ids); diff --git a/mm/sparse.c b/mm/sparse.c index a9783acf2bb9c3caee10ff8cfd3a9228ac73b4a9..83b3bf6461af556698c457627b0bc732725a0300 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -626,7 +626,7 @@ void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn) unsigned long pfn; for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { - unsigned long section_nr = pfn_to_section_nr(start_pfn); + unsigned long section_nr = pfn_to_section_nr(pfn); struct mem_section *ms; /* onlining code should never touch invalid ranges */ diff --git a/mm/swap.c b/mm/swap.c index 62d96b8e5eb3b01a51f48bcde87aabf591c72d96..9295ae960d6680165f67db4405698c3b48e3b84e 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -765,6 +765,17 @@ void release_pages(struct page **pages, int nr, bool cold) if (is_huge_zero_page(page)) continue; + /* Device public page can not be huge page */ + if (is_device_public_page(page)) { + if (locked_pgdat) { + spin_unlock_irqrestore(&locked_pgdat->lru_lock, + flags); + locked_pgdat = NULL; + } + put_zone_device_private_or_public_page(page); + continue; + } + page = compound_head(page); if (!put_page_testzero(page)) continue; diff --git a/mm/swapfile.c b/mm/swapfile.c index d483278ee35b1fe9f728ec93c867dcd7621fd4af..bf91dc9e7a791d38b28ab84f10b57892fc0af9dc 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3290,7 +3290,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) p->flags = 0; spin_unlock(&swap_lock); vfree(swap_map); - vfree(cluster_info); + kvfree(cluster_info); + kvfree(frontswap_map); if (swap_file) { if (inode && S_ISREG(inode->i_mode)) { inode_unlock(inode); diff --git a/mm/vmstat.c b/mm/vmstat.c index c7e4b84580235624f1c877ba8b921c577ad64f47..4bb13e72ac97c58e98cc77227a5107cd2c34c471 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -30,6 +30,8 @@ #include "internal.h" +#define NUMA_STATS_THRESHOLD (U16_MAX - 2) + #ifdef CONFIG_VM_EVENT_COUNTERS DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}}; EXPORT_PER_CPU_SYMBOL(vm_event_states); @@ -87,8 +89,10 @@ void vm_events_fold_cpu(int cpu) * vm_stat contains the global counters */ atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS] __cacheline_aligned_in_smp; +atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS] __cacheline_aligned_in_smp; atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS] __cacheline_aligned_in_smp; EXPORT_SYMBOL(vm_zone_stat); +EXPORT_SYMBOL(vm_numa_stat); EXPORT_SYMBOL(vm_node_stat); #ifdef CONFIG_SMP @@ -604,6 +608,32 @@ EXPORT_SYMBOL(dec_node_page_state); * Fold a differential into the global counters. * Returns the number of counters updated. */ +#ifdef CONFIG_NUMA +static int fold_diff(int *zone_diff, int *numa_diff, int *node_diff) +{ + int i; + int changes = 0; + + for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) + if (zone_diff[i]) { + atomic_long_add(zone_diff[i], &vm_zone_stat[i]); + changes++; + } + + for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) + if (numa_diff[i]) { + atomic_long_add(numa_diff[i], &vm_numa_stat[i]); + changes++; + } + + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) + if (node_diff[i]) { + atomic_long_add(node_diff[i], &vm_node_stat[i]); + changes++; + } + return changes; +} +#else static int fold_diff(int *zone_diff, int *node_diff) { int i; @@ -622,6 +652,7 @@ static int fold_diff(int *zone_diff, int *node_diff) } return changes; } +#endif /* CONFIG_NUMA */ /* * Update the zone counters for the current cpu. @@ -645,6 +676,9 @@ static int refresh_cpu_vm_stats(bool do_pagesets) struct zone *zone; int i; int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; +#ifdef CONFIG_NUMA + int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, }; +#endif int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, }; int changes = 0; @@ -666,6 +700,18 @@ static int refresh_cpu_vm_stats(bool do_pagesets) } } #ifdef CONFIG_NUMA + for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) { + int v; + + v = this_cpu_xchg(p->vm_numa_stat_diff[i], 0); + if (v) { + + atomic_long_add(v, &zone->vm_numa_stat[i]); + global_numa_diff[i] += v; + __this_cpu_write(p->expire, 3); + } + } + if (do_pagesets) { cond_resched(); /* @@ -712,7 +758,12 @@ static int refresh_cpu_vm_stats(bool do_pagesets) } } +#ifdef CONFIG_NUMA + changes += fold_diff(global_zone_diff, global_numa_diff, + global_node_diff); +#else changes += fold_diff(global_zone_diff, global_node_diff); +#endif return changes; } @@ -727,6 +778,9 @@ void cpu_vm_stats_fold(int cpu) struct zone *zone; int i; int global_zone_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, }; +#ifdef CONFIG_NUMA + int global_numa_diff[NR_VM_NUMA_STAT_ITEMS] = { 0, }; +#endif int global_node_diff[NR_VM_NODE_STAT_ITEMS] = { 0, }; for_each_populated_zone(zone) { @@ -743,6 +797,18 @@ void cpu_vm_stats_fold(int cpu) atomic_long_add(v, &zone->vm_stat[i]); global_zone_diff[i] += v; } + +#ifdef CONFIG_NUMA + for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) + if (p->vm_numa_stat_diff[i]) { + int v; + + v = p->vm_numa_stat_diff[i]; + p->vm_numa_stat_diff[i] = 0; + atomic_long_add(v, &zone->vm_numa_stat[i]); + global_numa_diff[i] += v; + } +#endif } for_each_online_pgdat(pgdat) { @@ -761,7 +827,11 @@ void cpu_vm_stats_fold(int cpu) } } +#ifdef CONFIG_NUMA + fold_diff(global_zone_diff, global_numa_diff, global_node_diff); +#else fold_diff(global_zone_diff, global_node_diff); +#endif } /* @@ -779,10 +849,36 @@ void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset) atomic_long_add(v, &zone->vm_stat[i]); atomic_long_add(v, &vm_zone_stat[i]); } + +#ifdef CONFIG_NUMA + for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) + if (pset->vm_numa_stat_diff[i]) { + int v = pset->vm_numa_stat_diff[i]; + + pset->vm_numa_stat_diff[i] = 0; + atomic_long_add(v, &zone->vm_numa_stat[i]); + atomic_long_add(v, &vm_numa_stat[i]); + } +#endif } #endif #ifdef CONFIG_NUMA +void __inc_numa_state(struct zone *zone, + enum numa_stat_item item) +{ + struct per_cpu_pageset __percpu *pcp = zone->pageset; + u16 __percpu *p = pcp->vm_numa_stat_diff + item; + u16 v; + + v = __this_cpu_inc_return(*p); + + if (unlikely(v > NUMA_STATS_THRESHOLD)) { + zone_numa_state_add(v, zone, item); + __this_cpu_write(*p, 0); + } +} + /* * Determine the per node value of a stat item. This function * is called frequently in a NUMA machine, so try to be as @@ -801,6 +897,23 @@ unsigned long sum_zone_node_page_state(int node, return count; } +/* + * Determine the per node value of a numa stat item. To avoid deviation, + * the per cpu stat number in vm_numa_stat_diff[] is also included. + */ +unsigned long sum_zone_numa_state(int node, + enum numa_stat_item item) +{ + struct zone *zones = NODE_DATA(node)->node_zones; + int i; + unsigned long count = 0; + + for (i = 0; i < MAX_NR_ZONES; i++) + count += zone_numa_state_snapshot(zones + i, item); + + return count; +} + /* * Determine the per node value of a stat item. */ @@ -937,6 +1050,9 @@ const char * const vmstat_text[] = { #if IS_ENABLED(CONFIG_ZSMALLOC) "nr_zspages", #endif + "nr_free_cma", + + /* enum numa_stat_item counters */ #ifdef CONFIG_NUMA "numa_hit", "numa_miss", @@ -945,7 +1061,6 @@ const char * const vmstat_text[] = { "numa_local", "numa_other", #endif - "nr_free_cma", /* Node-based counters */ "nr_inactive_anon", @@ -1106,7 +1221,6 @@ const char * const vmstat_text[] = { }; #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */ - #if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_COMPACTION)) || \ defined(CONFIG_PROC_FS) static void *frag_start(struct seq_file *m, loff_t *pos) @@ -1384,7 +1498,8 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, seq_printf(m, "\n per-node stats"); for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { seq_printf(m, "\n %-12s %lu", - vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], + vmstat_text[i + NR_VM_ZONE_STAT_ITEMS + + NR_VM_NUMA_STAT_ITEMS], node_page_state(pgdat, i)); } } @@ -1421,6 +1536,13 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, seq_printf(m, "\n %-12s %lu", vmstat_text[i], zone_page_state(zone, i)); +#ifdef CONFIG_NUMA + for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) + seq_printf(m, "\n %-12s %lu", + vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], + zone_numa_state_snapshot(zone, i)); +#endif + seq_printf(m, "\n pagesets"); for_each_online_cpu(i) { struct per_cpu_pageset *pageset; @@ -1497,6 +1619,7 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos) if (*pos >= ARRAY_SIZE(vmstat_text)) return NULL; stat_items_size = NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) + + NR_VM_NUMA_STAT_ITEMS * sizeof(unsigned long) + NR_VM_NODE_STAT_ITEMS * sizeof(unsigned long) + NR_VM_WRITEBACK_STAT_ITEMS * sizeof(unsigned long); @@ -1512,6 +1635,12 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos) v[i] = global_zone_page_state(i); v += NR_VM_ZONE_STAT_ITEMS; +#ifdef CONFIG_NUMA + for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) + v[i] = global_numa_state(i); + v += NR_VM_NUMA_STAT_ITEMS; +#endif + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) v[i] = global_node_page_state(i); v += NR_VM_NODE_STAT_ITEMS; @@ -1613,6 +1742,16 @@ int vmstat_refresh(struct ctl_table *table, int write, err = -EINVAL; } } +#ifdef CONFIG_NUMA + for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) { + val = atomic_long_read(&vm_numa_stat[i]); + if (val < 0) { + pr_warn("%s: %s %ld\n", + __func__, vmstat_text[i + NR_VM_ZONE_STAT_ITEMS], val); + err = -EINVAL; + } + } +#endif if (err) return err; if (write) @@ -1654,13 +1793,20 @@ static bool need_update(int cpu) struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu); BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1); +#ifdef CONFIG_NUMA + BUILD_BUG_ON(sizeof(p->vm_numa_stat_diff[0]) != 2); +#endif + /* * The fast way of checking if there are any vmstat diffs. * This works because the diffs are byte sized items. */ if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS)) return true; - +#ifdef CONFIG_NUMA + if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS)) + return true; +#endif } return false; } diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 62457eb823306111a44f172897cbc7aecd56134c..7c38e850a8fc5ce0c4f64090e9f9ac6ef6de7203 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -551,20 +551,23 @@ static int get_size_class_index(int size) return min_t(int, ZS_SIZE_CLASSES - 1, idx); } +/* type can be of enum type zs_stat_type or fullness_group */ static inline void zs_stat_inc(struct size_class *class, - enum zs_stat_type type, unsigned long cnt) + int type, unsigned long cnt) { class->stats.objs[type] += cnt; } +/* type can be of enum type zs_stat_type or fullness_group */ static inline void zs_stat_dec(struct size_class *class, - enum zs_stat_type type, unsigned long cnt) + int type, unsigned long cnt) { class->stats.objs[type] -= cnt; } +/* type can be of enum type zs_stat_type or fullness_group */ static inline unsigned long zs_stat_get(struct size_class *class, - enum zs_stat_type type) + int type) { return class->stats.objs[type]; } @@ -1969,6 +1972,14 @@ int zs_page_migrate(struct address_space *mapping, struct page *newpage, unsigned int obj_idx; int ret = -EAGAIN; + /* + * We cannot support the _NO_COPY case here, because copy needs to + * happen under the zs lock, which does not work with + * MIGRATE_SYNC_NO_COPY workflow. + */ + if (mode == MIGRATE_SYNC_NO_COPY) + return -EINVAL; + VM_BUG_ON_PAGE(!PageMovable(page), page); VM_BUG_ON_PAGE(!PageIsolated(page), page); diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 2287a0bca863bf57ffabaae9de58aab0174826c3..dd2c262aebbf2a43283f38badcf445702c59c9e9 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -145,7 +145,8 @@ sub list_types { close($script); my @types = (); - for ($text =~ /\b(?:(?:CHK|WARN|ERROR)\s*\(\s*"([^"]+)")/g) { + # Also catch when type or level is passed through a variable + for ($text =~ /(?:(?:\bCHK|\bWARN|\bERROR|&\{\$msg_level})\s*\(|\$msg_type\s*=)\s*"([^"]+)"/g) { push (@types, $_); } @types = sort(uniq(@types)); @@ -2715,10 +2716,10 @@ sub process { my $typo_fix = $spelling_fix{lc($typo)}; $typo_fix = ucfirst($typo_fix) if ($typo =~ /^[A-Z]/); $typo_fix = uc($typo_fix) if ($typo =~ /^[A-Z]+$/); - my $msg_type = \&WARN; - $msg_type = \&CHK if ($file); - if (&{$msg_type}("TYPO_SPELLING", - "'$typo' may be misspelled - perhaps '$typo_fix'?\n" . $herecurr) && + my $msg_level = \&WARN; + $msg_level = \&CHK if ($file); + if (&{$msg_level}("TYPO_SPELLING", + "'$typo' may be misspelled - perhaps '$typo_fix'?\n" . $herecurr) && $fix) { $fixed[$fixlinenr] =~ s/(^|[^A-Za-z@])($typo)($|[^A-Za-z@])/$1$typo_fix$3/; } @@ -2753,10 +2754,10 @@ sub process { $rawline =~ /\b59\s+Temple\s+Pl/i || $rawline =~ /\b51\s+Franklin\s+St/i) { my $herevet = "$here\n" . cat_vet($rawline) . "\n"; - my $msg_type = \&ERROR; - $msg_type = \&CHK if ($file); - &{$msg_type}("FSF_MAILING_ADDRESS", - "Do not include the paragraph about writing to the Free Software Foundation's mailing address from the sample GPL notice. The FSF has changed addresses in the past, and may do so again. Linux already includes a copy of the GPL.\n" . $herevet) + my $msg_level = \&ERROR; + $msg_level = \&CHK if ($file); + &{$msg_level}("FSF_MAILING_ADDRESS", + "Do not include the paragraph about writing to the Free Software Foundation's mailing address from the sample GPL notice. The FSF has changed addresses in the past, and may do so again. Linux already includes a copy of the GPL.\n" . $herevet) } # check for Kconfig help text having a real description @@ -2875,7 +2876,7 @@ sub process { # #defines that are a single string # # There are 3 different line length message types: -# LONG_LINE_COMMENT a comment starts before but extends beyond $max_linelength +# LONG_LINE_COMMENT a comment starts before but extends beyond $max_line_length # LONG_LINE_STRING a string starts before but extends beyond $max_line_length # LONG_LINE all other lines longer than $max_line_length # @@ -3810,10 +3811,10 @@ sub process { # avoid BUG() or BUG_ON() if ($line =~ /\b(?:BUG|BUG_ON)\b/) { - my $msg_type = \&WARN; - $msg_type = \&CHK if ($file); - &{$msg_type}("AVOID_BUG", - "Avoid crashing the kernel - try using WARN_ON & recovery code rather than BUG() or BUG_ON()\n" . $herecurr); + my $msg_level = \&WARN; + $msg_level = \&CHK if ($file); + &{$msg_level}("AVOID_BUG", + "Avoid crashing the kernel - try using WARN_ON & recovery code rather than BUG() or BUG_ON()\n" . $herecurr); } # avoid LINUX_VERSION_CODE @@ -4339,11 +4340,11 @@ sub process { # messages are ERROR, but ?: are CHK if ($ok == 0) { - my $msg_type = \&ERROR; - $msg_type = \&CHK if (($op eq '?:' || $op eq '?' || $op eq ':') && $ctx =~ /VxV/); + my $msg_level = \&ERROR; + $msg_level = \&CHK if (($op eq '?:' || $op eq '?' || $op eq ':') && $ctx =~ /VxV/); - if (&{$msg_type}("SPACING", - "spaces required around that '$op' $at\n" . $hereptr)) { + if (&{$msg_level}("SPACING", + "spaces required around that '$op' $at\n" . $hereptr)) { $good = rtrim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]) . " "; if (defined $fix_elements[$n + 2]) { $fix_elements[$n + 2] =~ s/^\s+//; @@ -4496,6 +4497,30 @@ sub process { } } +# check for unnecessary parentheses around comparisons in if uses + if ($^V && $^V ge 5.10.0 && defined($stat) && + $stat =~ /(^.\s*if\s*($balanced_parens))/) { + my $if_stat = $1; + my $test = substr($2, 1, -1); + my $herectx; + while ($test =~ /(?:^|[^\w\&\!\~])+\s*\(\s*([\&\!\~]?\s*$Lval\s*(?:$Compare\s*$FuncArg)?)\s*\)/g) { + my $match = $1; + # avoid parentheses around potential macro args + next if ($match =~ /^\s*\w+\s*$/); + if (!defined($herectx)) { + $herectx = $here . "\n"; + my $cnt = statement_rawlines($if_stat); + for (my $n = 0; $n < $cnt; $n++) { + my $rl = raw_line($linenr, $n); + $herectx .= $rl . "\n"; + last if $rl =~ /^[ \+].*\{/; + } + } + CHK("UNNECESSARY_PARENTHESES", + "Unnecessary parentheses around '$match'\n" . $herectx); + } + } + #goto labels aren't indented, allow a single space however if ($line=~/^.\s+[A-Za-z\d_]+:(?![0-9]+)/ and !($line=~/^. [A-Za-z\d_]+:/) and !($line=~/^.\s+default:/)) { diff --git a/sound/soc/codecs/pcm512x.c b/sound/soc/codecs/pcm512x.c index f1005a31c709ff61e2292ec12928d08f73e6f136..68feae262476492ae3a0b30855dbb09e96fa187e 100644 --- a/sound/soc/codecs/pcm512x.c +++ b/sound/soc/codecs/pcm512x.c @@ -30,9 +30,6 @@ #include "pcm512x.h" -#define DIV_ROUND_DOWN_ULL(ll, d) \ - ({ unsigned long long _tmp = (ll); do_div(_tmp, d); _tmp; }) - #define PCM512x_NUM_SUPPLIES 3 static const char * const pcm512x_supply_names[PCM512x_NUM_SUPPLIES] = { "AVDD", diff --git a/tools/testing/selftests/kcmp/kcmp_test.c b/tools/testing/selftests/kcmp/kcmp_test.c index a5a4da856dfedd7178fcaccd2f859c7bf3eb10b8..73684c4a1ed6929ddf5482119c009cd0c9ce1694 100644 --- a/tools/testing/selftests/kcmp/kcmp_test.c +++ b/tools/testing/selftests/kcmp/kcmp_test.c @@ -8,7 +8,6 @@ #include #include #include - #include #include @@ -16,20 +15,28 @@ #include #include #include +#include #include "../kselftest.h" -static long sys_kcmp(int pid1, int pid2, int type, int fd1, int fd2) +static long sys_kcmp(int pid1, int pid2, int type, unsigned long fd1, unsigned long fd2) { return syscall(__NR_kcmp, pid1, pid2, type, fd1, fd2); } +static const unsigned int duped_num = 64; + int main(int argc, char **argv) { const char kpath[] = "kcmp-test-file"; + struct kcmp_epoll_slot epoll_slot; + struct epoll_event ev; int pid1, pid2; + int pipefd[2]; int fd1, fd2; + int epollfd; int status; + int fddup; fd1 = open(kpath, O_RDWR | O_CREAT | O_TRUNC, 0644); pid1 = getpid(); @@ -39,6 +46,37 @@ int main(int argc, char **argv) ksft_exit_fail(); } + if (pipe(pipefd)) { + perror("Can't create pipe"); + ksft_exit_fail(); + } + + epollfd = epoll_create1(0); + if (epollfd < 0) { + perror("epoll_create1 failed"); + ksft_exit_fail(); + } + + memset(&ev, 0xff, sizeof(ev)); + ev.events = EPOLLIN | EPOLLOUT; + + if (epoll_ctl(epollfd, EPOLL_CTL_ADD, pipefd[0], &ev)) { + perror("epoll_ctl failed"); + ksft_exit_fail(); + } + + fddup = dup2(pipefd[1], duped_num); + if (fddup < 0) { + perror("dup2 failed"); + ksft_exit_fail(); + } + + if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fddup, &ev)) { + perror("epoll_ctl failed"); + ksft_exit_fail(); + } + close(fddup); + pid2 = fork(); if (pid2 < 0) { perror("fork failed"); @@ -95,6 +133,24 @@ int main(int argc, char **argv) ksft_inc_pass_cnt(); } + /* Compare epoll target */ + epoll_slot = (struct kcmp_epoll_slot) { + .efd = epollfd, + .tfd = duped_num, + .toff = 0, + }; + ret = sys_kcmp(pid1, pid1, KCMP_EPOLL_TFD, pipefd[1], + (unsigned long)(void *)&epoll_slot); + if (ret) { + printf("FAIL: 0 expected but %d returned (%s)\n", + ret, strerror(errno)); + ksft_inc_fail_cnt(); + ret = -1; + } else { + printf("PASS: 0 returned as expected\n"); + ksft_inc_pass_cnt(); + } + ksft_print_cnts(); if (ret)