diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 6ec844708f838d8667a56ea7cd3c33f4bd9884a5..ac637359e158bc8d79685b9beadf50bda67f13df 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -258,6 +258,8 @@ extern int mg_sp_group_add_task(int pid, unsigned long prot, int spg_id); extern int sp_group_add_task(int pid, int spg_id); extern void sp_area_drop(struct vm_area_struct *vma); +extern int sp_group_exit(struct mm_struct *mm); +extern void sp_group_post_exit(struct mm_struct *mm); static inline bool sp_is_enabled(void) { diff --git a/kernel/fork.c b/kernel/fork.c index 454b42af1de85fde59f3f783351a42b151c5ff21..bf27ee90ad2392f32dc7df3d9b87b29caacd0695 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -98,6 +98,7 @@ #include #include +#include #include #include #include @@ -1092,6 +1093,9 @@ static inline void __mmput(struct mm_struct *mm) ksm_exit(mm); khugepaged_exit(mm); /* must run before exit_mmap */ exit_mmap(mm); + + sp_group_post_exit(mm); + mm_put_huge_zero_page(mm); set_mm_exe_file(mm, NULL); if (!list_empty(&mm->mmlist)) { @@ -1111,6 +1115,9 @@ void mmput(struct mm_struct *mm) { might_sleep(); + if (sp_group_exit(mm)) + return; + if (atomic_dec_and_test(&mm->mm_users)) __mmput(mm); } diff --git a/mm/mmap.c b/mm/mmap.c index d5a97a56dca7b9e1d2a5ac9f7cffdf456d97c43c..c616e99e7672208baf46a93afb3f597bac17da02 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -183,6 +183,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) if (vma->vm_file) fput(vma->vm_file); mpol_put(vma_policy(vma)); + sp_area_drop(vma); vm_area_free(vma); return next; } @@ -1174,6 +1175,10 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, if (vm_flags & VM_SPECIAL) return NULL; + /* don't merge this kind of vma as sp_area couldn't be merged */ + if (sp_check_vm_share_pool(vm_flags)) + return NULL; + next = vma_next(mm, prev); area = next; if (area && area->vm_end == end) /* cases 6, 7, 8 */ diff --git a/mm/share_pool.c b/mm/share_pool.c index 8dc64232f0db15caf2127c7bd8aa356442e9c45a..96fc899617a572e719e38b3441203db871689116 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -4139,6 +4139,134 @@ static void __init proc_sharepool_init(void) /*** End of tatistical and maintenance functions ***/ +#define MM_WOULD_FREE 1 + +/* + * Recall we add mm->users by 1 deliberately in sp_group_add_task(). + * If the mm_users == sp_group_master->count + 1, it means that the mm is ready + * to be freed because the last owner of this mm is in exiting procedure: + * do_exit() -> exit_mm() -> mmput() -> sp_group_exit -> THIS function. + */ +static bool need_free_sp_group(struct mm_struct *mm, + struct sp_group_master *master) +{ + /* thread exits but process is still alive */ + if ((unsigned int)atomic_read(&mm->mm_users) != master->count + MM_WOULD_FREE) { + if (atomic_dec_and_test(&mm->mm_users)) + WARN(1, "Invalid user counting\n"); + return false; + } + + return true; +} + +/* + * Return: + * 1 - let mmput() return immediately + * 0 - let mmput() decrease mm_users and try __mmput() + */ +int sp_group_exit(struct mm_struct *mm) +{ + struct sp_group *spg; + struct sp_group_master *master; + struct sp_group_node *spg_node, *tmp; + bool is_alive = true; + + if (!sp_is_enabled()) + return 0; + + down_write(&sp_group_sem); + + master = mm->sp_group_master; + if (!master) { + up_write(&sp_group_sem); + return 0; + } + + if (!need_free_sp_group(mm, master)) { + up_write(&sp_group_sem); + return 1; + } + + list_for_each_entry_safe(spg_node, tmp, &master->node_list, group_node) { + spg = spg_node->spg; + + down_write(&spg->rw_lock); + /* a dead group should NOT be reactive again */ + if (spg_valid(spg) && list_is_singular(&spg->procs)) + is_alive = spg->is_alive = false; + spg->proc_num--; + list_del(&spg_node->proc_node); + up_write(&spg->rw_lock); + + if (!is_alive) + blocking_notifier_call_chain(&sp_notifier_chain, 0, + spg); + } + + /* match with get_task_mm() in sp_group_add_task() */ + if (atomic_sub_and_test(master->count, &mm->mm_users)) { + up_write(&sp_group_sem); + WARN(1, "Invalid user counting\n"); + return 1; + } + + up_write(&sp_group_sem); + return 0; +} + +void sp_group_post_exit(struct mm_struct *mm) +{ + struct sp_proc_stat *stat; + long alloc_size, k2u_size; + /* lockless visit */ + struct sp_group_master *master = mm->sp_group_master; + struct sp_group_node *spg_node, *tmp; + struct sp_group *spg; + + if (!sp_is_enabled() || !master) + return; + + /* + * There are two basic scenarios when a process in the share pool is + * exiting but its share pool memory usage is not 0. + * 1. Process A called sp_alloc(), but it terminates without calling + * sp_free(). Then its share pool memory usage is a positive number. + * 2. Process A never called sp_alloc(), and process B in the same spg + * called sp_alloc() to get an addr u. Then A gets u somehow and + * called sp_free(u). Now A's share pool memory usage is a negative + * number. Notice B's memory usage will be a positive number. + * + * We decide to print an info when seeing both of the scenarios. + * + * A process not in an sp group doesn't need to print because there + * wont't be any memory which is not freed. + */ + stat = sp_get_proc_stat(mm); + if (stat) { + alloc_size = atomic64_read(&stat->alloc_size); + k2u_size = atomic64_read(&stat->k2u_size); + + if (alloc_size != 0 || k2u_size != 0) + pr_info("process %s(%d) exits. It applied %ld aligned KB, k2u shared %ld aligned KB\n", + stat->comm, stat->tgid, + byte2kb(alloc_size), byte2kb(k2u_size)); + + /* match with sp_init_proc_stat, we expect stat is released after this call */ + sp_proc_stat_drop(stat); + } + + /* lockless traverse */ + list_for_each_entry_safe(spg_node, tmp, &master->node_list, group_node) { + spg = spg_node->spg; + /* match with refcount inc in sp_group_add_task */ + sp_group_drop(spg); + kfree(spg_node); + } + + kfree(master); +} + DEFINE_STATIC_KEY_FALSE(share_pool_enabled_key); static int __init enable_share_pool(char *s)