提交 e9f34a08 编写于 作者: Y Yu Kuai 提交者: Zheng Zengkai

eulerfs: add dependency operations

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I40JRR
CVE: NA

--------------------------------------

Operations like mkdir / rmdir will create a dependency node.
The node will be inserted to inode i_dep_list and will be
handled by persistence later.
Signed-off-by: NMingkai Dong <dongmingkai1@huawei.com>
Signed-off-by: NHou Tao <houtao1@huawei.com>
Signed-off-by: NZhikang Zhang <zhangzhikang1@huawei.com>
Signed-off-by: NYu Kuai <yukuai3@huawei.com>
Reviewed-by: NHou Tao <houtao1@huawei.com>
Signed-off-by: NZheng Zengkai <zhengzengkai@huawei.com>
上级 5bdf3e75
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved.
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
* only version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <linux/percpu.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/kthread.h>
#include <linux/list.h>
#include <linux/ratelimit.h>
#include <linux/writeback.h>
#include "euler.h"
#include "dep.h"
#include "lock.h"
#include "dax.h"
#include "dht.h"
static void do_dep_diradd_oneshot(struct inode *dir_inode, struct dep_node *dep,
u64 *bitset);
struct flush_list_head {
int count;
struct llist_head head;
};
DEFINE_PER_CPU(struct flush_list_head, flush_list_percpu);
#define IFMT_HAS_ROOT(ifmt) \
((ifmt) == S_IFREG || (ifmt) == S_IFDIR || (ifmt) == S_IFLNK)
#define INODE_COND_TRYLOCK(inode, tag, enter_cond, exit_cond, exit_expr) \
do { \
tag: \
if (enter_cond) { \
if (likely(inode_trylock(inode))) { \
/* get the lock, okay */ \
} else { \
if (exit_cond) { \
exit_expr; \
} else { \
cond_resched(); \
goto tag; \
} \
} \
} \
} while (0)
static inline void fsync_dir_oneshot(struct inode *dir)
{
eufs_dir_fsync_oneshot(dir);
}
static void do_dep_dirrem(struct inode *inode, struct dep_node *dep,
u64 *bitset)
{
struct nv_dict_entry *prevde = dep->prevde;
struct nv_dict_entry *de = dep->de;
int idx;
eufs_dbg("!! %s !!", __func__);
NV_ASSERT(de);
NV_ASSERT(de->inode);
NV_ASSERT(de->name);
idx = INDEX(de->hv);
bitset[idx / 64] = bitset[idx / 64] | (0x1ull << (idx & 63));
eufs_dbg("bitset-add: dict=%llx, %d %llx\n",
eufs_iread_dict(EUFS_PI(inode)), idx, bitset[idx / 64]);
/*
* This is a removal of a newly created dentry, nothing to do,
* the prevde is already manipulated in dht.c
*/
if (de->volatile_next == EUFS_DIR_DELNEW)
return;
/*
* If dentries immediately following the deleted dentry are
* also deleted, prevde->volatile_next will be modified again.
* So if we assign prevde->volatile_next to prevde->next,
* these deletion will be persisted prematurely.
*/
if (prevde && !eufs_dentry_is_not_persist(prevde)) {
prevde->next = de->next;
persist_dentry(prevde);
}
}
static void do_dep_dirrem_reclaim(struct super_block *sb, struct dep_node *dep)
{
struct nv_dict_entry *de = dep->de;
struct eufs_inode __maybe_unused *pi;
struct inode *child;
pi = s2p(sb, de->inode);
child = dep->inode;
NV_ASSERT(EUFS_PI(child) == pi);
eufs_dbg("dirrem: child_inode=%px\n", child);
BUG_ON(!child);
eufs_free_name(sb, de);
nv_free(sb, de);
}
#define EUFS_PRINT_BITSET(lvl, bitset) \
eufs_##lvl("bitsets: %llx %llx %llx %llx %llx %llx %llx %llx\n", \
bitset[0], bitset[1], bitset[2], bitset[3], bitset[4], \
bitset[5], bitset[6], bitset[7])
static void eufs_sync_buckets(struct eufs_inode_info *vi, u64 bitset[8])
{
struct inode *inode = &vi->vfs_inode;
struct super_block *sb = inode->i_sb;
struct eufs_inode *pi = EUFS_FRESH_PI(EUFS_PI(inode));
struct nv_dict *dict;
int i;
/* Volatile buckets */
if (!vi->i_volatile_dict)
return;
EUFS_PRINT_BITSET(dbg, bitset);
BUG_ON(!inode_is_header_locked(inode));
dict = o2p(sb, eufs_iread_dict(pi));
for (i = 0; i < 8; ++i) {
int j;
bool dirty;
int idx;
if (!bitset[i])
continue;
dirty = false;
for (j = 0; j <= 64; ++j) {
if (j % 8 == 0 && dirty) {
dirty = false;
eufs_flush_cacheline(&dict->table[idx]);
}
if (j == 64)
break;
if (!(bitset[i] & (0x1ull << j)))
continue;
idx = i * 64 + j;
eufs_dbg_dir("handle index %d (i %d, j %d) of inode=%px\n",
idx, i, j, inode);
eufs_dbg_dir(" idx=%d dict[idx]=%px vdict[idx]=%px\n",
idx, dict->table[idx],
vi->i_volatile_dict->table[idx]);
if (unlikely(vi->i_volatile_dict->table[idx] ==
EUFS_DIR_EOC_PTR))
dict->table[idx] = NULL_VAL;
else if (vi->i_volatile_dict->table[idx] != NULL)
dict->table[idx] = COMPOSE_DICT_HEAD_le64(
sb, vi->i_volatile_dict->table[idx]);
vi->i_volatile_dict->table[idx] = NULL;
dirty = true;
}
}
}
/*
* Some ideas on fast fsync (of dir):
*
* 1. Batch and coalescence. The newly inserted dentry should be marked and
* during its removal, it should be marked again so that unnecessary dep_diradd
* an be prevented.
*
* 2. Split! The lock (only when there is one lock needed) can be temporarily
* given up so between handling two deps. This requires that the dentry pointed
* by dir_pi should not be reclaimed (like in RCU). Well, actually, combined
* with the following one idea, this is quite acceptable.
*
* 3. Delayed free. The removal operations can be delayed until the locks are
* released.
*
*
* Parallel fsync for a vi is not throughly considered though.
*
* 4. Detach only if the list is empty?
*/
static void fsync_rename_inode(struct inode *dir)
{
struct eufs_inode_info *vi = EUFS_I(dir);
if (!vi->i_is_dirty)
return;
/* I'm holding the lock, so if it's dirty, it's dirty. */
fsync_dir_oneshot(dir);
}
void fsync_rename_inodes(struct inode *old_dir, struct inode *new_dir,
struct inode **locked_inodes)
{
int i;
struct inode *inode;
/*
* The two parent dirs, might have parent-child relations sometime
* before. So we need to transfer these two dirs too.
*/
for (i = 0; i < EUFS_INODE_CNT_IN_RENAME; i++) {
inode = locked_inodes[i];
if (inode)
eufs_inode_mark_lock_transferable(inode);
}
if (old_dir == new_dir) {
fsync_rename_inode(old_dir);
} else {
fsync_rename_inode(old_dir);
fsync_rename_inode(new_dir);
}
for (i = 0; i < EUFS_INODE_CNT_IN_RENAME; i++) {
inode = locked_inodes[i];
if (inode)
eufs_inode_wait_lock_transfer_done(inode);
}
}
static void eufs_update_persisted_seq(struct eufs_inode_info *vi,
struct list_head *head)
{
if (!list_empty(head)) {
struct dep_node *dep =
list_last_entry(head, struct dep_node, node);
vi->i_persisted_dep_seq = dep->seq;
}
}
static int fsync_dir_bg(struct inode *dir)
{
struct dep_node *dep, *next;
LIST_HEAD(detached_list);
LIST_HEAD(dump_list);
int i;
#define FSYNC_DIR_VI_LOOP_NUM (20)
struct eufs_inode_info *vi = EUFS_I(dir);
struct super_block *sb = dir->i_sb;
struct eufs_sb_info *sbi = EUFS_SB(sb);
struct eufs_inode *pi = EUFS_PI(dir);
u64 bitset[8] = { 0 };
int dep_count = 0;
retry:
inode_urgent_lock(dir);
/* Phase 1 */
for (i = FSYNC_DIR_VI_LOOP_NUM; i >= 0; --i) {
/* Get all deps round by round */
if (i == 0) {
/* Last round */
inode_header_lock(dir);
}
inode_dep_lock(dir);
if (list_empty(&vi->i_dep_list) && i > 0) {
/* Skip to last round */
i = 1;
}
list_cut_position(&detached_list, &vi->i_dep_list,
vi->i_dep_list.prev);
if (i > 0)
inode_dep_unlock(dir);
/* Do dep one by one. */
list_for_each_entry_safe(dep, next, &detached_list, node) {
if (dep->type == DEP_DIRADD) {
/*
* FIXME: the lockset might be different since
* we might have released the inode lock.
*/
do_dep_diradd_oneshot(dir, dep, bitset);
} else if (dep->type == DEP_DIRREM) {
do_dep_dirrem(dir, dep, bitset);
} else
BUG();
}
list_splice_tail_init(&detached_list, &dump_list);
if (i == 0) {
eufs_pbarrier();
if (!list_empty(&dump_list))
/* Phase 2 */
eufs_sync_buckets(vi, bitset);
inode_dep_unlock(dir);
inode_header_unlock(dir);
break;
}
}
inode_urgent_unlock(dir);
/* Phase 3 */
inode_lock(dir);
if (!list_empty(&vi->i_dep_list)) {
inode_unlock(dir);
/* To handle new deps between phase 2 & 3 */
/* FIXME: Live lock possible! */
goto retry;
}
if (dir->i_nlink)
eufs_sync_pinode(dir, pi, false);
eufs_update_persisted_seq(vi, &dump_list);
vi->i_is_persisting = false;
vi->i_is_dirty = false;
if (dir->i_nlink)
persist_pinode(pi);
inode_unlock(dir);
eufs_pbarrier();
/* Reclaim memory and clear the list */
list_for_each_entry_safe(dep, next, &dump_list, node) {
struct inode *child_inode = dep->inode;
struct eufs_inode_info *child_vi = EUFS_I(child_inode);
if (dep->type == DEP_DIRREM)
do_dep_dirrem_reclaim(sb, dep);
/* remove from owner list */
spin_lock(&child_vi->i_owner_lock);
list_del_init(&dep->owner_node);
spin_unlock(&child_vi->i_owner_lock);
iput(child_inode);
list_del(&dep->node);
eufs_free_dep_node(dep);
dep_count++;
}
atomic_sub(dep_count, &sbi->s_nr_dep_nodes);
eufs_dbg("@cpu=%d !! fsync dir vi done: inode=%px\n",
smp_processor_id(), &vi->vfs_inode);
return 0;
}
static int fsync_nondir_oneshot(struct inode *inode)
{
struct eufs_inode_info *vi = EUFS_I(inode);
struct eufs_inode *pi;
/* For files other than dir */
WARN(S_ISDIR(inode->i_mode), "%s on a dir!", __func__);
/* Inode needs to remove. Nothing to do */
if (!inode->i_nlink) {
vi->i_is_dirty = false;
return 0;
}
pi = EUFS_PI(inode);
eufs_sync_pinode(inode, pi, false);
persist_pinode(pi);
vi->i_is_dirty = false;
return 0;
}
static int fsync_nondir_bg(struct inode *inode)
{
struct eufs_inode_info *vi = EUFS_I(inode);
int r;
inode_lock(inode);
r = fsync_nondir_oneshot(inode);
vi->i_is_persisting = false;
inode_unlock(inode);
return r;
}
static void fsync_bg(struct inode *inode)
{
struct eufs_sb_info *sbi = EUFS_SB(inode->i_sb);
wait_on_inode(inode);
/* Reading i_mode may need no protection */
if (S_ISDIR(inode->i_mode))
fsync_dir_bg(inode);
else
fsync_nondir_bg(inode);
/* Decrease */
iput(inode);
if (atomic_dec_and_test(&sbi->s_nr_dirty_inodes) && sbi->s_draining) {
/* end of draining */
sbi->s_draining = false;
}
}
void fsync_oneshot(struct inode *inode)
{
/* Reading i_mode may need no protection */
if (S_ISDIR(inode->i_mode))
fsync_dir_oneshot(inode);
else
fsync_nondir_oneshot(inode);
}
static void do_dep_diradd_oneshot(struct inode *dir_inode, struct dep_node *dep,
u64 *bitset)
{
struct super_block *sb = dir_inode->i_sb;
struct nv_dict_entry *de = dep->de;
struct inode *inode = dep->inode;
struct eufs_inode_info *dir_vi = EUFS_I(dir_inode);
struct eufs_inode *pi;
struct eufs_inode *fresh_pi;
int idx;
void *buffer[16];
struct alloc_batch ab;
bool lock_transferred = false;
idx = INDEX(de->hv);
bitset[idx / 64] = bitset[idx / 64] | (0x1ull << (idx & 63));
if (de->volatile_next == EUFS_DIR_DELNEW) {
/*
* The de is already invisible from both the latest view and
* the consistent view.
* Will be handled in the corresponding dirrem.
*/
return;
}
/* Meow? This equality is the sign of diradd */
WARN(!eufs_dentry_is_not_persist(de), "diradd wrong sign");
pi = s2p(sb, de->inode);
wait_on_inode(inode);
retry:
if (likely(inode_trylock(inode))) {
/* Got the lock */
} else {
if (eufs_inode_mark_lock_transferring(inode)) {
lock_transferred = true;
} else {
cond_resched();
goto retry;
}
}
eufs_sync_pinode(inode, pi, false);
fresh_pi = EUFS_FRESH_PI(pi);
if (!lock_transferred)
inode_unlock(inode);
else
eufs_inode_lock_transfer_done(inode);
ab.n_used = 0;
ab.size = 16;
ab.batch = buffer;
eufs_alloc_batch_add(sb, &ab, de);
/*
* force to persist the allocation without checking.
* TODO: we should differentiate the link and create syscall to agree
* with checking
*/
eufs_alloc_persist(sb, pi, true);
if (S_ISLNK(fresh_pi->i_mode)) {
void *root = o2p(sb, eufs_iread_root(fresh_pi));
/* reg file's root is done in btree */
/* In case of Hard link, we must force the allocation persitence */
eufs_alloc_persist(sb, root, true);
persist_symlink(root);
} else if (S_ISDIR(fresh_pi->i_mode)) {
void *root = o2p(sb, eufs_iread_root(fresh_pi));
eufs_alloc_persist(sb, root, false);
persist_page(root);
}
persist_name(sb, de, &ab);
eufs_alloc_batch_persist_reset(sb, &ab);
persist_pinode(pi);
spin_lock(&dir_vi->i_dentry_persist_lock);
eufs_dentry_clr_not_persist_flag(de);
spin_unlock(&dir_vi->i_dentry_persist_lock);
persist_dentry(de);
}
void eufs_dir_fsync_oneshot(struct inode *dir)
{
struct dep_node *dep;
struct dep_node *next;
struct super_block *sb = dir->i_sb;
struct eufs_sb_info *sbi = EUFS_SB(sb);
struct eufs_inode_info *vi = EUFS_I(dir);
LIST_HEAD(detached_list);
u64 bitset[8] = { 0 };
int dep_count = 0;
BUG_ON(!inode_is_locked(dir));
inode_urgent_lock(dir);
/* get all deps */
inode_header_lock(dir);
inode_dep_lock(dir);
if (list_empty(&vi->i_dep_list))
goto unlock_sync_pinode;
list_for_each_entry(dep, &vi->i_dep_list, node) {
if (dep->type == DEP_DIRADD)
do_dep_diradd_oneshot(dir, dep, bitset);
else if (dep->type == DEP_DIRREM)
do_dep_dirrem(dir, dep, bitset);
else
BUG();
}
list_splice_init(&vi->i_dep_list, &detached_list);
/* sync buckets */
eufs_pbarrier();
eufs_sync_buckets(vi, bitset);
unlock_sync_pinode:
inode_dep_unlock(dir);
inode_header_unlock(dir);
/* sync pinode */
if (dir->i_nlink)
eufs_sync_pinode(dir, EUFS_PI(dir), false);
eufs_pbarrier();
eufs_update_persisted_seq(vi, &detached_list);
vi->i_is_dirty = false;
/* Reclaim memory and clear the list */
list_for_each_entry_safe(dep, next, &detached_list, node) {
struct inode *child_inode = dep->inode;
struct eufs_inode_info *child_vinode = EUFS_I(child_inode);
spin_lock(&child_vinode->i_owner_lock);
list_del_init(&dep->owner_node);
spin_unlock(&child_vinode->i_owner_lock);
if (dep->type == DEP_DIRREM) {
do_dep_dirrem_reclaim(sb, dep);
iput(dep->inode);
} else if (dep->type == DEP_DIRADD) {
iput(dep->inode);
}
list_del(&dep->node);
eufs_free_dep_node(dep);
dep_count++;
}
atomic_sub(dep_count, &sbi->s_nr_dep_nodes);
inode_urgent_unlock(dir);
}
void fsync_on_draining(struct inode *dir, struct inode *inode)
{
BUG_ON(!dir);
BUG_ON(!inode_is_locked(dir));
BUG_ON(inode && !inode_is_locked(inode));
/* for link/unlink/rmdir */
if (inode)
eufs_inode_mark_lock_transferable(inode);
fsync_dir_oneshot(dir);
if (inode)
eufs_inode_wait_lock_transfer_done(inode);
}
#define NR_FLUSH_EACH_ROUND (16)
#define FLUSH_START_THRESHOLD (64)
static __always_inline int handle_persistees_for_each_cpu(
struct super_block *sb, const struct cpumask *mask, int idx) {
struct eufs_sb_info *sbi = EUFS_SB(sb);
struct llist_node *list;
struct llist_head *head;
struct eufs_inode_info *vi;
struct eufs_inode_info *next;
int n_active_list;
int cpu;
bool need;
retry:
need = sbi->need_sync[idx];
n_active_list = 0;
for_each_cpu(cpu, mask) {
head = per_cpu_ptr(sbi->persistee_list, cpu);
if (unlikely(llist_empty(head)))
continue;
n_active_list++;
list = llist_del_all(head);
eufs_dbg("persister get list %px for cpu%d\n", list, cpu);
/* reverse the ordering for better locality? */
llist_for_each_entry_safe(vi, next, list, i_persistee_node)
fsync_bg(&vi->vfs_inode);
eufs_dbg("persister handled list %px\n", list);
}
/**
* We need a complete round of run for fssync. If
* need != sbi->need_sync[idx], need_sync was modified during our last
* round. We need to retry to ensure a complete round of run.
* It's okay if dirty inodes of a cpu is still being processed by
* another persister, since we will wait for all persisters to finish
* for fssync.
*/
if (need != READ_ONCE(sbi->need_sync[idx]))
goto retry;
if (need) {
sbi->need_sync[idx] = false;
wake_up(&sbi->sync_wq);
}
if (READ_ONCE(sbi->need_sync[idx]))
goto retry;
return n_active_list;
}
static int persister(void *data)
{
struct super_block *sb = data;
struct eufs_sb_info *sbi = EUFS_SB(sb);
const struct cpumask *mask = cpumask_of_node(numa_node_id());
const int period =
(persist_period == 0) ? /* default */ (HZ / 4) :
/* less than a second */
((persist_period < 0) ? (HZ / (-persist_period)) :
/* more than a second */
(HZ * persist_period));
int idx = 0;
int num_persisters = num_sockets * persisters_per_socket;
eufs_info("sb=%px cpu=%d cpumask=%*pbl period=%d\n", data,
smp_processor_id(), cpumask_pr_args(mask), period);
while (idx < num_persisters && sbi->persisters[idx] != current)
idx++;
BUG_ON(idx >= num_persisters);
while (!kthread_should_stop()) {
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(period);
handle_persistees_for_each_cpu(sb, mask, idx);
}
while (handle_persistees_for_each_cpu(sb, mask, idx))
cpu_relax();
eufs_info("finalizing on %d\n", smp_processor_id());
return 0;
}
int dep_init(struct super_block *sb)
{
struct eufs_sb_info *sbi = EUFS_SB(sb);
int cpu;
int i, j;
char name[BDEVNAME_SIZE];
int err;
sbi->persistee_list = alloc_percpu(struct llist_head);
if (!sbi->persistee_list) {
err = -ENOMEM;
goto cleanup;
}
/* init each llist */
for_each_possible_cpu(cpu)
init_llist_head(per_cpu_ptr(sbi->persistee_list, cpu));
sbi->persisters = kmalloc(sizeof(struct task_struct *) *
persisters_per_socket * num_sockets,
GFP_KERNEL);
if (!sbi->persisters) {
err = -ENOMEM;
goto cleanup;
}
sbi->need_sync = kzalloc(
sizeof(bool) * persisters_per_socket * num_sockets, GFP_KERNEL);
if (!sbi->need_sync) {
err = -ENOMEM;
goto cleanup;
}
init_waitqueue_head(&sbi->sync_wq);
bdevname(sb->s_bdev, name);
for (i = 0; i < num_sockets; ++i) {
for (j = 0; j < persisters_per_socket; ++j) {
int idx = i * persisters_per_socket + j;
sbi->persisters[idx] = kthread_create_on_node(
persister, sb, i, "hmfs/%s-%d.%d", name, i, j);
if (IS_ERR(sbi->persisters[idx])) {
err = PTR_ERR(sbi->persisters[idx]);
pr_err("create persister %s-%d.%d error %d",
name, i, j, err);
sbi->persisters[idx] = NULL;
goto cleanup;
}
set_cpus_allowed_ptr(sbi->persisters[idx],
cpumask_of_node(i));
wake_up_process(sbi->persisters[idx]);
}
}
return 0;
cleanup:
dep_fini(sb);
return err;
}
void dep_fini(struct super_block *sb)
{
struct eufs_sb_info *sbi = EUFS_SB(sb);
if (sbi->persisters) {
int i;
for (i = 0; i < persisters_per_socket * num_sockets; ++i) {
if (sbi->persisters[i]) {
kthread_stop(sbi->persisters[i]);
sbi->persisters[i] = NULL;
}
}
kfree(sbi->persisters);
sbi->persisters = NULL;
}
kfree(sbi->need_sync);
sbi->need_sync = NULL;
free_percpu(sbi->persistee_list);
sbi->persistee_list = NULL;
}
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved.
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
* only version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#ifndef EUFS_DEP_H
#define EUFS_DEP_H
#include <linux/llist.h>
#include <linux/list.h>
#include <linux/fs.h>
#include "euler.h"
#include "alloc_interface.h"
/**
* Dep type:
* - diradd (for create/symlink/link/mknod)
* - dirrem
*/
enum fsync_type {
FSYNC_DEP,
FSYNC_RENAME,
FSYNC_SYSCALL,
};
extern int disable_persisters;
extern int persist_period;
extern int persisters_per_socket;
#define eufs_dep_seq_after(a, b) ((s32)((b) - (a)) < 0)
#define eufs_dep_seq_after_eq(a, b) ((s32)((a) - (b)) >= 0)
void eufs_dir_fsync_oneshot(struct inode *dir);
void fsync_on_draining(struct inode *dir, struct inode *inode);
void fsync_rename_inodes(struct inode *old_inode, struct inode *new_inode,
struct inode **locked_inodes);
void fsync_oneshot(struct inode *inode);
enum dep_type {
DEP_DIRADD, /* Hard link is detected by checking inode->i_nlink */
DEP_DIRREM,
DEP_TYPE_COUNT,
};
struct dep_node {
struct list_head node;
struct list_head owner_node;
u32 seq;
/* Type of the dependency */
enum dep_type type;
/* Previous dentry */
struct nv_dict_entry *prevde;
/* header of the list */
u64 *nv_header;
/* Related Dentry, which also points to an inode */
struct nv_dict_entry __pmem *de;
/* inode for de->pi */
struct inode *inode;
struct inode *dir;
} __aligned(CACHELINE_SIZE);
int dep_init(struct super_block *sb);
void dep_fini(struct super_block *sb);
static __always_inline void request_persistence(struct inode *inode)
{
struct eufs_sb_info *sbi = EUFS_SB(inode->i_sb);
struct eufs_inode_info *vi = EUFS_I(inode);
int cpu;
BUG_ON(!inode_is_locked(inode));
if (!vi->i_is_dirty)
vi->i_is_dirty = true;
if (vi->i_is_persisting)
return;
cpu = get_cpu();
llist_add(&vi->i_persistee_node, per_cpu_ptr(sbi->persistee_list, cpu));
put_cpu();
eufs_dbg_vlimit("sbi->s_nr_dirty_inodes=%d ++ vi=%px @cpu=%d\n",
atomic_read(&sbi->s_nr_dirty_inodes), vi, cpu);
if (atomic_inc_return(&sbi->s_nr_dirty_inodes) > max_dirty_inodes &&
!sbi->s_draining)
sbi->s_draining = true;
vi->i_is_persisting = true;
ihold(inode);
}
/* precondition: dir inode is mutex-locked */
static __always_inline void dep_insert(struct inode *dir, struct dep_node *dep)
{
struct eufs_inode_info *dir_vi = EUFS_I(dir);
struct eufs_inode_info *child_vi = EUFS_I(dep->inode);
struct eufs_sb_info *sbi = EUFS_SB(dir->i_sb);
inode_dep_lock(dir);
inode_header_unlock(dir);
list_add_tail(&dep->node, &dir_vi->i_dep_list);
spin_lock(&child_vi->i_owner_lock);
list_add_tail(&dep->owner_node, &child_vi->i_owner_list);
spin_unlock(&child_vi->i_owner_lock);
inode_dep_unlock(dir);
eufs_dbg_vlimit("sbi->s_nr_dep_nodes=%d ++\n",
atomic_read(&sbi->s_nr_dep_nodes));
if (atomic_inc_return(&sbi->s_nr_dep_nodes) > max_dep_nodes &&
!sbi->s_draining) {
sbi->s_draining = true;
}
/* Request a persistence */
request_persistence(dir);
}
static __always_inline bool eufs_valid_inode_in_de(struct nv_dict_entry *de,
struct inode *inode)
{
return (le64_to_cpu(de->inode) == inode->i_ino);
}
static __always_inline void
dep_new_insert(struct dep_node *dep, struct inode *dir, enum dep_type type,
struct nv_dict_entry *prevde, u64 *nv_header,
struct nv_dict_entry *de, struct inode *inode, u32 seq)
{
dep->type = type;
dep->prevde = prevde;
dep->nv_header = nv_header;
dep->de = de;
dep->inode = inode;
dep->dir = dir;
dep->seq = seq;
NV_ASSERT(eufs_valid_inode_in_de(dep->de, dep->inode));
ihold(dep->inode);
dep_insert(dir, dep);
}
static __always_inline void persist_dentry(struct nv_dict_entry *de)
{
NV_ASSERT(de);
NV_ASSERT((u64)de % CACHELINE_SIZE == 0);
NV_ASSERT(sizeof(de) <= CACHELINE_SIZE);
eufs_flush_cacheline(de);
}
static __always_inline void persist_pinode(struct eufs_inode *pi)
{
WARN_ON(!EUFS_IS_HEAD_PI(pi));
NV_ASSERT(pi);
NV_ASSERT((u64)pi % CACHELINE_SIZE == 0);
NV_ASSERT(sizeof(pi) <= EUFS_INODE_SIZE);
eufs_flush_cacheline(EUFS_FRESH_PI(pi));
eufs_flush_cacheline(&EUFS_FRESH_PI(pi)->i_fresh);
}
static __always_inline void persist_name(struct super_block *sb,
const struct nv_dict_entry *de,
struct alloc_batch *ab)
{
size_t len = HASHLEN_LEN(de->hv);
struct nv_name_ext *next;
const char *name;
if (likely(len <= FIRST_LEN)) {
/* embedded in de */
return;
}
next = s2p(sb, de->nextname);
len -= FIRST_LEN;
name = next->name;
eufs_alloc_batch_add(sb, ab, (void *)name);
while (len > FOLLOW_LEN) {
next = s2p(sb, next->nextname);
eufs_flush_cacheline(name);
len -= FOLLOW_LEN;
name = next->name;
eufs_alloc_batch_add(sb, ab, (void *)name);
}
eufs_flush_cacheline(name);
}
static __always_inline void persist_symlink(void *root)
{
u64 len;
NV_ASSERT(root);
NV_ASSERT(((u64)root) % PAGE_SIZE == 0);
len = EUFS_SYMLINK_HASHLEN_LEN(*((u64 *)root));
NV_ASSERT(len <= EUFS_MAX_SYMLINK_LEN);
BUG_ON(len > EUFS_MAX_SYMLINK_LEN);
eufs_flush_range(root, EUFS_SYMLINK_SIZE(len));
}
static __always_inline void persist_page(const char *page)
{
NV_ASSERT(page);
NV_ASSERT(((u64)page) % PAGE_SIZE == 0);
eufs_flush_page(page);
}
#endif /* EUFS_DEP_H */
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册