提交 4b22469e 编写于 作者: Y Yu Kuai 提交者: Zheng Zengkai

eulerfs: common definitions

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I40JRR
CVE: NA

--------------------------------------

These interfaces will be implemented and used in later patches.
Signed-off-by: NMingkai Dong <dongmingkai1@huawei.com>
Signed-off-by: NHou Tao <houtao1@huawei.com>
Signed-off-by: NYu Kuai <yukuai3@huawei.com>
Reviewed-by: NHou Tao <houtao1@huawei.com>
Signed-off-by: NZheng Zengkai <zhengzengkai@huawei.com>
上级 22f7a4bf
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved.
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
* only version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#ifndef EUFS_CONST_H
#define EUFS_CONST_H
/* EULER */
#define EUFS_SUPER_MAGIC 0x50C9
/* Mount flags */
#define EUFS_MOUNT_ERRORS_RO 0x000001 /* Remount fs ro on errors */
#define EUFS_MOUNT_ERRORS_PANIC 0x000002 /* Panic on errors */
#define EUFS_MOUNT_FORMAT 0x000004 /* was FS formatted on mount? */
#define NULL_ADDR ((u64)-1ll)
#define NULL_VAL (0)
#define NULL_ADDR_PTR ((void *)(NULL_ADDR))
/* FS Limits */
#define EUFS_MAX_NAME_LEN (255)
#define EUFS_LINK_MAX (32000) /* max links to a file */
/* layout: hash_len (u64) + sym_link + trailing zero */
#define EUFS_MAX_SYMLINK_LEN (PAGE_SIZE - sizeof(u64) - 1)
#define EUFS_SYMLINK_HASHLEN_LEN(hashlen) (((hashlen) >> 48) & 0xfff)
#define EUFS_SYMLINK_SIZE(len) ((len) + sizeof(u64) + 1)
#define EUFS_BLOCK_SIZE (4096)
#define EUFS_BLOCK_SIZE_BITS (12)
/* The initial height is 0 when the file tree contains no or one block */
#define EUFS_MAX_FILE_TREE_HEIGHT 3
#define EUFS_FILE_TREE_DEGREE_SHIFT 9
#define EUFS_FILE_TREE_DEGREE (1U << EUFS_FILE_TREE_DEGREE_SHIFT)
#define EUFS_MAX_FILE_BLK_CNT \
(1ll << (EUFS_MAX_FILE_TREE_HEIGHT * EUFS_FILE_TREE_DEGREE_SHIFT))
#define EUFS_MAX_FILE_SIZE (4096ll * EUFS_MAX_FILE_BLK_CNT)
#define EUFS_POISON_POINTER ((void *)0x1010101010101010UL)
#define EUFS_POISON_VALUE ((u64)0x1010101010101010UL)
#define CACHELINE_SIZE (64)
#define EUFS_ALLOC_BLOCKS_ZERO_NONE (0x0) /* Zero none NULL_ADDR pages */
#define EUFS_ALLOC_BLOCKS_ZERO_ALL (0x1) /* Zero all NULL_ADDR pages */
#define EUFS_ALLOC_BLOCKS_ZERO_EDGE (0x2) /* Zero edge NULL_ADDR pages */
#define EUFS_INODE_SIZE (CACHELINE_SIZE * 2)
#define NV_DICT_CAPACITY (512ULL)
/*
* EOC stands for "End Of Chain".
*
* When volatile bucket (namely table[idx]) is EUFS_DIR_EOC_PTR,
* it means that both volatile bucket and persist bucket are empty.
* When volatile bucket is NULL, it just means that volatile
* bucket is empty.
*
* When volatile_next is EUFS_DIR_EOC, it means current entry is
* the last one in the chain although its next may still points
* to an entry (because the setting and persistence of next are
* deferred). When volatile_next is NULL, it means next should be
* checked to ensure whether or not the current entry is the last
* one in the chain.
*/
#define EUFS_DIR_EOC ((u64)-1)
#define EUFS_DIR_EOC_PTR ((void *)EUFS_DIR_EOC)
/* DIR DELeted NEW dentry */
#define EUFS_DIR_DELNEW ((u64)0x3030303030303030UL)
#endif /* EUFS_CONST_H */
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved.
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
* only version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#ifndef EUFS_H
#define EUFS_H
#include <linux/crc16.h>
#include <linux/crc32.h>
#include <linux/crc32c.h>
#include <linux/version.h>
#include <linux/pagemap.h>
#include <linux/types.h>
#include <linux/rcupdate.h>
#include <linux/uio.h>
#include <linux/mutex.h>
#include <linux/slab.h>
/* annotation for pointer to persistent memory */
#define __pmem
#define NV_CHECK (0)
#if NV_CHECK
#pragma message "NV CHECK IS TURNED ON! NO PERF. EVAL.!"
#endif
#if NV_CHECK
#define NV_ASSERT(x) \
do { \
if (!(x)) { \
eufs_warn("assertion failed %s:%d: %s\n", __FILE__, \
__LINE__, #x); \
} \
WARN(!(x), "detail:"); \
} while (0)
#else
#define NV_ASSERT(x)
#endif
#include "const.h"
#include "euler_dbg.h"
#include "nvm_struct.h"
#include "euler_def.h"
#include "kmem_cache.h"
#include "flush.h"
#include "euler_common.h"
#include "inode.h"
#include "nvalloc.h"
extern int num_sockets;
/* Function Prototypes */
extern __printf(2, 3) void eufs_error_mng(struct super_block *sb,
const char *fmt, ...);
/* dir.c */
extern const struct file_operations eufs_dir_operations;
/* file.c */
extern const struct inode_operations eufs_file_inode_operations;
extern const struct file_operations eufs_file_operations;
int eufs_fsync(struct file *file, loff_t start, loff_t end, int datasync);
/* inode.c */
extern const struct address_space_operations eufs_aops;
/* namei.c */
extern const struct inode_operations eufs_dir_inode_operations;
extern const struct inode_operations eufs_special_inode_operations;
/* symlink.c */
extern const struct inode_operations eufs_symlink_inode_operations;
#endif /* EUFS_H */
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved.
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
* only version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#ifndef EUFS_COMMON_H
#define EUFS_COMMON_H
#include <linux/crc16.h>
#include <linux/crc32.h>
#include <linux/crc32c.h>
#include <linux/pagemap.h>
#include <linux/types.h>
#include <linux/rcupdate.h>
#include <linux/uio.h>
#include <linux/mutex.h>
#include <linux/version.h>
#include <linux/slab.h>
#include <linux/fs.h>
#ifndef EUFS_H
#error "Please include euler_common.h by including euler.h"
#endif
#define EUFS_INODE_CNT_IN_RENAME 4
#define PAGE_DIV_ROUND_UP(x) (((x) + PAGE_SIZE - 1) >> PAGE_SHIFT)
#define PAGE_DIV_ROUND_DOWN(x) (((x)) >> PAGE_SHIFT)
#define clear_opt(o, opt) (o &= ~EUFS_MOUNT_##opt)
#define set_opt(o, opt) (o |= EUFS_MOUNT_##opt)
#define test_opt(sb, opt) (EUFS_SB(sb)->s_mount_opt & EUFS_MOUNT_##opt)
static __always_inline void *o2p(struct super_block *sb, u64 offset);
static __always_inline u64 p2o(struct super_block *sb, void *ptr);
static __always_inline struct eufs_sb_info *EUFS_SB(struct super_block *sb)
{
return sb->s_fs_info;
}
static __always_inline struct eufs_inode_info *EUFS_I(struct inode *inode)
{
return container_of(inode, struct eufs_inode_info, vfs_inode);
}
static __always_inline struct eufs_inode *EUFS_PI(struct inode *inode)
{
return (struct eufs_inode *)o2p(inode->i_sb, inode->i_ino);
}
static __always_inline unsigned long eufs_pi2ino(struct super_block *sb,
struct eufs_inode *pi)
{
return p2o(sb, EUFS_HEAD_PI(pi));
}
static __always_inline struct eufs_super_block *
eufs_get_super(struct super_block *sb)
{
struct eufs_sb_info *sbi = EUFS_SB(sb);
return (struct eufs_super_block *)sbi->virt_addr;
}
static __always_inline void *eufs_get_renamej(struct super_block *sb, int cpu)
{
struct eufs_sb_info *sbi = EUFS_SB(sb);
return (void *)((u64)sbi->renamej + EUFS_RENAMEJ_ENTRY_SIZE * cpu);
}
/*
* o: offset: u64
* p: pointer: void *
* s: storage: __le64
*/
static __always_inline void *o2p(struct super_block *sb, u64 offset)
{
if (offset == 0)
return NULL;
if (offset == -1)
return (void *)-1;
return (void *)(EUFS_SB(sb)->virt_addr + offset);
}
static __always_inline u64 p2o(struct super_block *sb, void *ptr)
{
if (ptr == NULL)
return 0;
if (ptr == (void *)-1)
return -1;
return (u64)(ptr - EUFS_SB(sb)->virt_addr);
}
/* pointer to storage */
static __always_inline __le64 p2s(struct super_block *sb, void *ptr)
{
return cpu_to_le64(p2o(sb, ptr));
}
/* storage to pointer */
static __always_inline void *s2p(struct super_block *sb, __le64 s)
{
return o2p(sb, le64_to_cpu(s));
}
static __always_inline bool
eufs_access_ok(struct super_block *sb, const void *pointer, unsigned long sz)
{
return true;
}
#define eufs_ptr_fast_check_b(ptr) 0
#define eufs_ptr_fast_check(ptr) BUG_ON(eufs_ptr_fast_check_b(ptr))
#define HASHLEN_LEN(hashlen) (((hashlen) >> 48) & 0xff)
static __always_inline hashlen_t hash(const char *name, size_t len)
{
static const int seed = 131;
u64 r = 0;
int i;
for (i = 0; i < len; ++i)
r = r * seed + (int)name[i];
return (u64)len << 48 | (r & 0xffffffffffff);
}
static __always_inline bool key_equals(struct super_block *sb, const char *key,
hashlen_t hashlen,
const struct nv_dict_entry *de)
{
int len;
struct nv_name_ext *p;
NV_ASSERT(key);
NV_ASSERT(hashlen);
if (hashlen != de->hv)
return false;
len = HASHLEN_LEN(hashlen);
if (likely(len <= FIRST_LEN))
return memcmp(de->name, key, len) == 0;
if (memcmp(de->name, key, FIRST_LEN))
return false;
eufs_dbg("first len ok\n");
len -= FIRST_LEN;
p = s2p(sb, de->nextname);
key += FIRST_LEN;
while (len > FOLLOW_LEN) {
eufs_dbg("check again p:%*s key:%*s\n", (int)FOLLOW_LEN,
p->name, (int)FOLLOW_LEN, key);
if (memcmp(p->name, key, FOLLOW_LEN))
return false;
p = s2p(sb, p->nextname);
key += FOLLOW_LEN;
len -= FOLLOW_LEN;
}
eufs_dbg("final check name p:%*s key:%*s\n", len, p->name, len, key);
return !memcmp(p->name, key, len);
}
static __always_inline void eufs_flush_pi(struct eufs_inode *pi)
{
eufs_flush_cacheline(pi);
eufs_flush_cacheline(&pi->i_fresh);
}
static __always_inline void inode_dep_lock(struct inode *inode)
{
mutex_lock(&EUFS_I(inode)->i_dep_lock);
}
static __always_inline void inode_dep_unlock(struct inode *inode)
{
mutex_unlock(&EUFS_I(inode)->i_dep_lock);
}
static __always_inline int inode_is_dep_locked(struct inode *inode)
{
return mutex_is_locked(&EUFS_I(inode)->i_dep_lock);
}
static __always_inline void inode_header_lock(struct inode *inode)
{
mutex_lock(&EUFS_I(inode)->i_header_lock);
}
static __always_inline void inode_header_unlock(struct inode *inode)
{
mutex_unlock(&EUFS_I(inode)->i_header_lock);
}
static __always_inline int inode_is_header_locked(struct inode *inode)
{
return mutex_is_locked(&EUFS_I(inode)->i_header_lock);
}
static __always_inline void inode_urgent_lock(struct inode *inode)
{
mutex_lock(&EUFS_I(inode)->i_urgent_mutex);
}
static __always_inline void inode_urgent_unlock(struct inode *inode)
{
mutex_unlock(&EUFS_I(inode)->i_urgent_mutex);
}
static __always_inline int inode_is_urgent_locked(struct inode *inode)
{
return mutex_is_locked(&EUFS_I(inode)->i_urgent_mutex);
}
static __always_inline void inode_leaf_lock(struct inode *inode)
{
mutex_lock(&EUFS_I(inode)->i_leaf_lock);
}
static __always_inline void inode_leaf_unlock(struct inode *inode)
{
mutex_unlock(&EUFS_I(inode)->i_leaf_lock);
}
#endif /* EUFS_COMMON_H */
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved.
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
* only version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#ifndef EUFS_DBG_H
#define EUFS_DBG_H
/*
* Debug code
*/
#ifdef pr_fmt
#undef pr_fmt
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#endif
#define eufs_dbg(s, args...)
#define eufs_dbg_vlimit(s, args...)
#define eufs_dbg_dir(s, args...)
#define eufs_crit(s, args...) pr_crit(s, ##args)
#define eufs_err(sb, s, args...) eufs_error_mng(sb, s, ##args)
#define eufs_warn(s, args...) pr_warn(s, ##args)
#define eufs_info(s, args...) \
pr_info("(pid=%d,cmd=%s) " s, current->pid, current->comm, ##args)
#endif /* EUFS_DBG_H */
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved.
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
* only version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#ifndef EUFS_DEF_H
#define EUFS_DEF_H
#ifndef EUFS_H
#error "Do not include euler_def.h directly. Include euler.h instead."
#endif
#include <linux/cpufeature.h>
#include <linux/processor.h>
#include <linux/types.h>
#include <linux/magic.h>
#include <linux/delay.h>
struct alloc_batch {
/* both in slots */
long size;
long n_used;
void **batch;
long n_pending;
struct list_head list;
};
struct v_dict;
enum { I_TRANS_NONE = 0, I_TRANS_AVAIL, I_TRANS_LOCKED };
struct eufs_inode_info {
struct list_head i_dep_list; /* A list of struct op_node to persist */
/* protect operations on i_dep_list */
struct mutex i_dep_lock;
struct llist_node i_persistee_node;
u32 i_next_dep_seq;
u32 i_persisted_dep_seq;
spinlock_t i_owner_lock;
struct list_head i_owner_list;
/* regular file: pmem pointer */
void __pmem *i_volatile_root;
struct v_dict *i_volatile_dict;
/*
* serialize the insertion of dependency nodes into the same
* directory by different processes or CPUs
*/
struct mutex i_header_lock;
struct mutex i_urgent_mutex;
int i_volatile_height;
u64 i_volatile_tree_blocks;
u64 i_dotdot;
/*
* a inode can only be added into a persistence list once,
* so use i_is_persisting & inode_lock to ensure that.
*/
bool i_is_persisting;
/* whether or not the inode need persistence */
bool i_is_dirty;
int i_lock_transferred;
bool hole_at_sta; /* the 0th data block is a hole */
u64 i_ext;
u16 i_version;
struct alloc_batch page_batch;
/* serialize mmap with truncate/fallocate/write/unlink */
struct rw_semaphore mmap_rwsem;
/* Protect pointers to leaf nodes (data pages) */
struct mutex i_leaf_lock;
spinlock_t i_dentry_persist_lock;
struct inode vfs_inode;
};
typedef u8 page_info_t;
struct page_wear;
/*
* EulerFS super-block data in memory
*/
struct eufs_sb_info {
struct block_device *s_bdev;
struct dax_device *s_dax_dev;
phys_addr_t phys_addr;
void __pmem *virt_addr;
struct vm_struct *vm;
unsigned long block_start;
unsigned long block_end;
void __pmem *renamej;
u64 s_crash_ver;
/* protects the SB's buffer-head */
struct mutex s_lock;
unsigned long blocksize;
unsigned long initsize;
unsigned long s_mount_opt;
atomic_t next_generation;
/* Begin of Allocator */
/* DRAM pools:
* - a single global pool
* - potected by page_lock and line_lock
* - a local pool per cpu
* - allocate/free from global pool in batch
* - no locks needed
* - a single (global) rest pool
* - when a page is used too many times, it is put into rest pool
* - cache lines are never put in rest pool
*/
spinlock_t large_lock;
spinlock_t page_lock;
spinlock_t line_lock;
struct mem_pool *gpool;
struct mem_pool *ppool; /* percpu variable */
spinlock_t rest_lock;
struct mem_pool *rest_pool;
page_info_t __pmem *page_map;
void __pmem *data_start;
u64 npages;
/* Other DRAM structures for the allcoator:
*
* - struct ptr_list_node: an unit for allocation (i.e., a page
* or a cacheline).
*
* - cached nodes: preallocated ptr_list_node for all pages, indexed by
* the page number. If the page is free, its ptr_list_node should
* be in some mem_pool.
*
* - line_node_ptrs: preallocated pointers for all pages. For each
* page, the pointer may point to an array of
* (PAGE_SIZE/CACHELINE_SIZE) ptr_list_nodes, each of which presents
* the allocation status of the corresponding cache line in the page.
* The array is dynamically allocated for memory conservation.
*
* - line_indicators: preallocated u8s for all pages. Each of the u8s
* records the number of cache lines available in global pool. This
* is used for cacheline coalescence.
*
* - page_wears: preallocated ints for all pages. Each of the ints
* records the number of writes to the page. This is used to
* coarse-grainedly show the degree of wear.
*
*/
struct ptr_list_node *cached_nodes;
struct ptr_list_node **line_node_ptrs;
u8 *line_indicators; /* Number of lines used per page! */
struct page_wear *page_wears;
/* End of Allocator */
/* Begin of Persister */
/* kmem cache for dep_node is universal defined in super.c */
struct llist_head *persistee_list; /* percpu variable */
struct task_struct **persisters;
bool *need_sync; /* for fssync */
wait_queue_head_t sync_wq; /* for fssync's thread */
struct mutex sync_mutex; /* serialize fssync request */
/* End of Persister */
/* The word `draining` is reserved for volatility quota limitation */
bool s_draining;
wait_queue_head_t s_draining_wq;
atomic_t s_nr_dirty_inodes;
atomic_t s_nr_dep_nodes;
struct mutex gather_mutex;
};
struct dir_scan_data {
struct super_block *sb;
struct dir_context *ctx;
};
typedef u64 hashlen_t;
#endif /* EUFS_DEF_H */
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved.
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
* only version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#ifndef EUFS_NVM_STRUCT_H
#define EUFS_NVM_STRUCT_H
#define EUFS_SB_SIZE 512
#define EUFS_SB2_OFFSET 512
#define EUFS_SB_PADDING (1024 * 2)
/* Used by rename journal */
#define EUFS_MAX_CPU_CNT 128
#define EUFS_RENAMEJ_ENTRY_SIZE (1024)
#define EUFS_RENAMEJ_SIZE (EUFS_MAX_CPU_CNT * EUFS_RENAMEJ_ENTRY_SIZE)
#define EUFS_RENAMEJ_OFFSET (EUFS_SB_SIZE * 2 + EUFS_SB_PADDING)
#define EUFS_CRC_SEED (~0)
#define EUFS_RENAME_IN_ACTION 1
/*
* Layout
* +------------------------+
* | Super Block | 64B
* +------------------------+
* | Padding | 512B-64B
* +------------------------+
* | Seconary Super Block | 64B
* +------------------------+
* | Padding | Aligned to 4K
* +------------------------+
*
* +------------------------+
* | |
* | pages |
* | |
* +------------------------+
* | bitmap for pages | 4K-aligned
* +------------------------+
* | Rename-Journals | 128K (128 cores * 1024B/core)
* +------------------------+
* | |
* | pages |
* | |
* +------------------------+
*/
/*
* Structure of the EulerFS super block.
*/
struct eufs_super_block {
/* checksum of this sb */
__le16 s_sum;
/* magic signature */
__le16 s_magic;
char s_safe_umount;
char s_flag;
__le16 s_fs_version;
/* 8 Bytes */
/* total size of fs in bytes */
__le64 s_size;
/* base virtual address used in fs */
__le64 s_virt_addr;
/* 24 Bytes */
char s_volume_name[16];
/* 40 Bytes */
/* points to the location of mini-journal and rename journal */
__le64 s_page_map;
/* 48 Bytes */
/*
* s_mtime(mount time) and s_wtime(write time) should be together and
* their order should not be changed. we use an 8 byte write to update
* both of them atomically.
*/
__le32 s_mtime;
__le32 s_wtime;
/* 56 Bytes */
__le64 s_root_pi;
/* 64 Bytes */
__le64 s_crash_ver;
};
/* ========== directory & hash ========== */
#define FIRST_LEN (CACHELINE_SIZE - sizeof(__le64) * 5)
#define FOLLOW_LEN (CACHELINE_SIZE - sizeof(__le64))
typedef u64 hashlen_t;
struct nv_dict {
__le64 __pmem table[NV_DICT_CAPACITY]; /* <struct nv_dict_entry *> */
} __aligned(PAGE_SIZE);
struct nv_dict_entry {
/* half a cache line (8B * 4) size in total */
__le64 inode; /* <struct eufs_inode *> */
__le64 next; /* <struct nv_dict_entry *> */
__le64 volatile_next; /* <struct nv_dict_entry *> */
/* store some filename */
__le64 hv; /* <hashlen_t> hashlen */
__le64 nextname; /* <char *> */
char name[FIRST_LEN];
} __aligned(CACHELINE_SIZE);
struct nv_name_ext {
char name[FOLLOW_LEN];
__le64 nextname;
} __aligned(CACHELINE_SIZE);
#define EUFS_IS_HEAD_PI(pi) (!((u64)(pi) & (0x100 - 1)))
#define EUFS_TWIN_PI(pi) \
(EUFS_IS_HEAD_PI(pi) ? (((struct eufs_inode *)(pi)) + 1) : \
(((struct eufs_inode *)(pi)) - 1))
#define EUFS_FRESH_PI(pi) \
(((pi)->i_fresh >= EUFS_TWIN_PI(pi)->i_fresh) ? (pi) : \
EUFS_TWIN_PI(pi))
#define EUFS_HEAD_PI(pi) (EUFS_IS_HEAD_PI(pi) ? (pi) : EUFS_TWIN_PI(pi))
/* ========== inode ========== */
struct eufs_inode {
/* Cacheline 1: readmost part */
/* 0 ~ 8 */
__le32 i_flags; /* Inode flags */
__le16 i_mode; /* File mode */
__le16 i_version; /* Inode version */
/* 8 ~ 16 */
/* Note: the ctime to report is max(i_ctime, i_mtime) */
__le64 i_ctime; /* Inode modification time (only for metadata) */
/* 16 ~ 24 */
__le32 i_uid; /* Owner Uid */
__le32 i_gid; /* Group Id */
/* 24 ~ 32 */
__le64 i_dotdot; /* <struct eufs_inode *> parent inode (dir only) */
/* 32 ~ 40 */
__le64 i_ext; /* reserved for extension */
/* 40 ~ 48 */
__le32 i_ctime_nsec; /* nano sec */
/* 48 ~ 56 */
__le64 padding1;
/* 56 ~ 64 */
__le64 padding2;
/* Cacheline 2: readmost part */
/* readwirte part */
/* 0 ~ 8 */
__le32 i_generation; /* File version (for NFS) */
__le16 i_nlink; /* Links count */
/*
* Freshness: we have twin-inodes here. When we access an inode,
* we compare the freshness of the two inodes and use the one with
* higher freshness. The freshness is only 16-bit, but we can easily
* handle the overflow.
*/
__le16 i_fresh; /* Freshness of the inode */
/* 8 ~ 16 */
__le64 i_mtime; /* Inode b-tree Modification time */
/* 16 ~ 24 */
__le64 i_atime; /* Access time */
/* 24 ~ 32 */
union {
__le64 i_root; /* btree root (regular only) */
__le64 i_dict; /* dict root (dir only */
__le32 i_rdev; /* major/minor (device only) */
};
/* 32 ~ 40 */
/*
* Size:
* for directory: number of entries inside
* for regular: number of bytes stored
* others: not used
*/
__le64 i_size; /* Size of data in bytes */
/* 40 ~ 48 */
__le64 i_tree_blocks; /* #blocks allocated in btree (regular only) */
/* 48 ~ 56 */
__le32 i_mtime_nsec; /* nano sec */
__le32 i_atime_nsec; /* nano sec */
/* 56 ~ 64 */
__le64 padding3;
} __aligned(CACHELINE_SIZE);
#define eufs_iread_flags(i) (le32_to_cpu((i)->i_flags))
#define eufs_iread_mode(i) (le16_to_cpu((i)->i_mode))
#define eufs_iread_ctime(i) (le64_to_cpu((i)->i_ctime))
#define eufs_iread_uid(i) (le32_to_cpu((i)->i_uid))
#define eufs_iread_gid(i) (le32_to_cpu((i)->i_gid))
#define eufs_iread_dotdot(i) (le64_to_cpu((i)->i_dotdot))
#define eufs_iwrite_flags(i, v) ((i)->i_flags = cpu_to_le32(v))
#define eufs_iwrite_mode(i, v) ((i)->i_mode = cpu_to_le16(v))
#define eufs_iwrite_ctime(i, v) ((i)->i_ctime = cpu_to_le64(v))
#define eufs_iwrite_uid(i, v) ((i)->i_uid = cpu_to_le32(v))
#define eufs_iwrite_gid(i, v) ((i)->i_gid = cpu_to_le32(v))
#define eufs_iwrite_dotdot(i, v) ((i)->i_dotdot = cpu_to_le64(v))
#define eufs_iread_version(i) (le16_to_cpu((i)->i_version))
#define eufs_iread_ctime_nsec(i) (le32_to_cpu((i)->i_ctime_nsec))
#define eufs_iread_ext(i) (le64_to_cpu((i)->i_ext))
#define eufs_iwrite_version(i, v) ((i)->i_version = cpu_to_le16(v))
#define eufs_iwrite_ctime_nsec(i, v) ((i)->i_ctime_nsec = cpu_to_le32(v))
#define eufs_iwrite_ext(i, v) ((i)->i_ext = cpu_to_le64(v))
#define eufs_writemostly_inode(i) ((i))
#define eufs_iread_generation(i) \
(le32_to_cpu(eufs_writemostly_inode(i)->i_generation))
#define eufs_iread_nlink(i) (le16_to_cpu(eufs_writemostly_inode(i)->i_nlink))
#define eufs_iread_mtime(i) (le64_to_cpu(eufs_writemostly_inode(i)->i_mtime))
#define eufs_iread_atime(i) (le64_to_cpu(eufs_writemostly_inode(i)->i_atime))
#define eufs_iread_root(i) (le64_to_cpu(eufs_writemostly_inode(i)->i_root))
#define eufs_iread_dict(i) (le64_to_cpu(eufs_writemostly_inode(i)->i_dict))
#define eufs_iread_rdev(i) (le32_to_cpu(eufs_writemostly_inode(i)->i_rdev))
#define eufs_iread_size(i) (le64_to_cpu(eufs_writemostly_inode(i)->i_size))
#define eufs_iread_tree_blocks(i) \
(le64_to_cpu(eufs_writemostly_inode(i)->i_tree_blocks))
#define eufs_iwrite_generation(i, v) \
(eufs_writemostly_inode(i)->i_generation = cpu_to_le32(v))
#define eufs_iwrite_nlink(i, v) \
(eufs_writemostly_inode(i)->i_nlink = cpu_to_le16(v))
#define eufs_iwrite_mtime(i, v) \
(eufs_writemostly_inode(i)->i_mtime = cpu_to_le64(v))
#define eufs_iwrite_atime(i, v) \
(eufs_writemostly_inode(i)->i_atime = cpu_to_le64(v))
#define eufs_iwrite_root(i, v) \
(eufs_writemostly_inode(i)->i_root = cpu_to_le64(v))
#define eufs_iwrite_dict(i, v) \
(eufs_writemostly_inode(i)->i_dict = cpu_to_le64(v))
#define eufs_iwrite_rdev(i, v) \
(eufs_writemostly_inode(i)->i_rdev = cpu_to_le32(v))
#define eufs_iwrite_size(i, v) \
(eufs_writemostly_inode(i)->i_size = cpu_to_le64(v))
#define eufs_iwrite_tree_blocks(i, v) \
(eufs_writemostly_inode(i)->i_tree_blocks = cpu_to_le64(v))
#define eufs_iread_mtime_nsec(i) \
(le32_to_cpu(eufs_writemostly_inode(i)->i_mtime_nsec))
#define eufs_iread_atime_nsec(i) \
(le32_to_cpu(eufs_writemostly_inode(i)->i_atime_nsec))
#define eufs_iwrite_mtime_nsec(i, v) \
(eufs_writemostly_inode(i)->i_mtime_nsec = cpu_to_le32(v))
#define eufs_iwrite_atime_nsec(i, v) \
(eufs_writemostly_inode(i)->i_atime_nsec = cpu_to_le32(v))
static inline void eufs_iwrite_ctime_mtime(struct eufs_inode *pi,
struct inode *vi)
{
eufs_iwrite_ctime(pi, vi->i_ctime.tv_sec);
eufs_iwrite_ctime_nsec(pi, vi->i_ctime.tv_nsec);
eufs_iwrite_mtime(pi, vi->i_mtime.tv_sec);
eufs_iwrite_mtime_nsec(pi, vi->i_mtime.tv_nsec);
}
struct eufs_renamej {
__le32 crc;
__le32 flags;
__le64 addr_of_oldnext;
__le64 oldnext;
__le64 addr_of_newde;
__le64 composed_newde; /* composed as list header */
__le64 newde_inode;
__le64 old_dir_pi;
__le64 new_dir_pi;
__le64 time;
__le32 time_nsec;
__le16 old_link;
__le16 new_link;
__le32 old_size;
__le32 new_size;
__u8 pad[40];
} __aligned(CACHELINE_SIZE);
typedef u8 page_info_t;
typedef u8 line_info_t;
struct embedded_line_info {
line_info_t gens[64];
};
#endif /* EUFS_NVM_STRUCT_H */
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved.
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
* only version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#ifndef EUFS_PBATCH_H
#define EUFS_PBATCH_H
/**
* To prevent data races, only two cases are allowed:
* 1) nvmalloc -> alloc_batch_persist -> nvfree
* 2) nvmalloc -> nvfree
*/
/**
* eufs_alloc_batch_* API usage:
*
* struct alloc_batch batch;
* [ eufs_alloc_batch_init(&batch, estimated_size); ]
* eufs_alloc_batch_hint(&batch, estimated_size);
* eufs_alloc_batch_add(&batch, the_page_pointer);
* eufs_alloc_batch_add(&batch, the_page_pointer);
* ...
* eufs_alloc_batch_add(&batch, the_page_pointer);
* eufs_alloc_batch_persist_reset(&batch);
*
* eufs_alloc_batch_fini(&batch);
*
*/
/* TODO: consider using list? */
#define EUFS_AB_MAX_SIZE (KMALLOC_MAX_SIZE / 8)
/* log2(cache_line size / page_info_t size) */
#define EUFS_PMAP_CNT_SHIFT_PER_CACHELINE 6
static __always_inline void eufs_alloc_batch_hint(struct alloc_batch *pb,
ssize_t size);
static __always_inline void
eufs_alloc_batch_persist_reset(struct super_block *sb, struct alloc_batch *pb);
static __always_inline void eufs_alloc_batch_init(struct alloc_batch *pb,
ssize_t size)
{
pb->n_used = 0;
pb->batch = NULL;
pb->size = 0;
pb->n_pending = 0;
eufs_alloc_batch_hint(pb, size);
BUG_ON(!pb->batch);
}
/* This gives only hints, no guarantees. */
static __always_inline void eufs_alloc_batch_hint(struct alloc_batch *pb,
ssize_t size)
{
ssize_t realsize;
void **batch;
realsize = round_up(size * sizeof(void *), PAGE_SIZE);
if (realsize > KMALLOC_MAX_SIZE)
realsize = KMALLOC_MAX_SIZE;
size = realsize / sizeof(void *);
if (pb->size >= size)
return;
batch = krealloc(pb->batch, realsize, GFP_KERNEL | __GFP_NOFAIL);
BUG_ON(batch == NULL);
pb->batch = batch;
pb->size = size;
eufs_dbg("! eufs_alloc_batch_hint ; ab=%px size=%ld\n", pb, size);
}
static __always_inline void eufs_alloc_batch_hint_off(struct alloc_batch *pb,
ssize_t off_size)
{
eufs_alloc_batch_hint(pb, pb->size + pb->n_pending + off_size);
}
static __always_inline void eufs_alloc_batch_fini(struct alloc_batch *pb)
{
kfree(pb->batch);
pb->batch = NULL;
pb->size = pb->n_used = 0;
}
/* Add an already allocated address */
static __always_inline void eufs_alloc_batch_add(struct super_block *sb,
struct alloc_batch *pb,
void *page)
{
if (pb->n_used == pb->size) {
/* Enlarge */
if (pb->size == EUFS_AB_MAX_SIZE)
eufs_alloc_batch_persist_reset(sb, pb);
else
eufs_alloc_batch_hint(pb, pb->size * 2);
BUG_ON(pb->n_used >= pb->size);
}
BUG_ON(pb->n_used >= pb->size);
pb->batch[pb->n_used] = page;
pb->n_used++;
}
/*
* With the following four functions, alloc_batch can be used as a pool of
* preallocation.
*/
static __always_inline int
eufs_alloc_batch_pre_allocate_begin(struct super_block *sb,
struct alloc_batch *ab, size_t need_blocks)
{
long r;
BUG_ON(ab->n_pending);
eufs_alloc_batch_hint_off(ab, need_blocks);
ab->n_pending = need_blocks;
r = nvmalloc_pre(sb, ab, need_blocks, PAGE_SIZE);
if (r)
ab->n_pending = 0;
return r;
}
static __always_inline void
eufs_alloc_batch_pre_allocate_end(struct super_block *sb,
struct alloc_batch *ab)
{
WARN((ab->n_pending != 0),
"Some pre-allocated pages are not used in %px!\n", ab);
BUG_ON(!list_empty(&ab->list));
}
/* Allocate from the pre-allocated addresses */
static __always_inline void *eufs_alloc_batch_allocate(struct super_block *sb,
struct alloc_batch *ab,
u8 tag)
{
void *page = NULL;
/* used up */
BUG_ON(ab->n_pending <= 0);
page = nvmalloc_pre_get_from_list(sb, &ab->list, tag);
BUG_ON(!page);
ab->n_pending--;
eufs_alloc_batch_add(sb, ab, page);
return page;
}
static __always_inline void *
eufs_alloc_batch_allocate_file_index(struct super_block *sb,
struct alloc_batch *ab)
{
return eufs_alloc_batch_allocate(sb, ab, EUFS_PAGE_FILE_INDEX);
}
static __always_inline void *
eufs_alloc_batch_allocate_file_data(struct super_block *sb,
struct alloc_batch *ab)
{
return eufs_alloc_batch_allocate(sb, ab, EUFS_PAGE_FILE_DATA);
}
static int cmp_func(const void *a, const void *b)
{
const void **_a = (const void **)a;
const void **_b = (const void **)b;
if (*_a > *_b)
return 1;
if (*_a < *_b)
return -1;
return 0;
}
#define _PAGE_NO(ptr) (((u64)ptr - (u64)sbi->data_start) / PAGE_SIZE)
#define _LINE_MAP(addr) ((line_info_t *)((u64)(addr)&PAGE_MASK))
#define _IS_LINE(addr) ((u64)addr % PAGE_SIZE)
static __always_inline void _set_bitmap(struct eufs_sb_info *sbi, u64 addr,
bool forced)
{
u64 page_no = _PAGE_NO(addr);
u64 rem = addr % PAGE_SIZE;
line_info_t __pmem *line_map;
/* no one can free this address now, so no race will happen */
struct ptr_list_node *node;
int line_no;
if (rem == 0) {
/* page */
node = sbi->cached_nodes + (page_no);
if (!forced) {
BUG_ON(node->solid);
BUG_ON(sbi->page_map[page_no] != EUFS_PAGE_FREE);
}
WARN(node->tag == 0,
"unexpected page node tag %u (addr 0x%llx)\n", node->tag,
addr);
sbi->page_map[page_no] = node->tag;
node->solid = true;
} else {
/* line */
BUG_ON(rem % CACHELINE_SIZE != 0);
line_map = (void *)(addr - rem);
line_no = rem / CACHELINE_SIZE;
BUG_ON(sbi->page_map[page_no] != EUFS_PAGE_FREE &&
sbi->page_map[page_no] != EUFS_PAGE_LINE_USED);
/* \ _set _unset
* _set idempotent
* _unset
*/
if (sbi->page_map[page_no] == EUFS_PAGE_FREE) {
/* idempotent */
sbi->page_map[page_no] = EUFS_PAGE_LINE_USED;
node = sbi->cached_nodes + (page_no);
BUG_ON(!node->busy);
node->solid = true;
}
node = &sbi->line_node_ptrs[page_no][line_no];
if (!forced) {
BUG_ON(node->solid);
if (line_map[line_no]) {
eufs_info(
"!line_map[line_no] = %px[%d] = %d\n",
line_map, line_no, line_map[line_no]);
BUG();
}
BUG_ON(line_map[line_no]);
}
WARN(node->tag == 0,
"unexpected line node tag %u (addr 0x%llx)\n", node->tag,
addr);
line_map[line_no] = node->tag;
eufs_dbg("set %px[%d] = %d forced=%d\n", line_map, line_no,
line_map[line_no], forced);
node->solid = true;
BUG_ON(!node->busy);
}
}
static __always_inline void
eufs_alloc_batch_persist_reset(struct super_block *sb, struct alloc_batch *pb)
{
struct eufs_sb_info *sbi = EUFS_SB(sb);
u64 page_no, page_no0;
int i;
if (pb->n_used == 0)
goto reset;
if (pb->size == 0)
goto reset;
BUG_ON(!pb->batch);
sort(pb->batch, pb->n_used, sizeof(void *), cmp_func, NULL);
for (i = 0; i < pb->n_used; ++i) {
if (i > 0 && pb->batch[i] == pb->batch[i - 1]) {
pr_info("!pb->batch[i]=%px [i-1]=%px i=%d\n",
pb->batch[i], pb->batch[i - 1], i);
BUG();
}
_set_bitmap(sbi, (u64)pb->batch[i], false);
}
page_no0 = _PAGE_NO(pb->batch[0]);
if (_IS_LINE(pb->batch[0]))
eufs_flush_cacheline(_LINE_MAP(pb->batch[0]));
eufs_flush_cacheline(&sbi->page_map[page_no0]);
for (i = 1; i < pb->n_used; ++i) {
page_no = _PAGE_NO(pb->batch[i]);
if (page_no == page_no0)
/* same page, must be allocation of two cache lines */
continue;
/* different page */
if (_IS_LINE(pb->batch[i]))
eufs_flush_cacheline(_LINE_MAP(pb->batch[i]));
/* not in a single cache line */
if ((page_no >> EUFS_PMAP_CNT_SHIFT_PER_CACHELINE) !=
(page_no0 >> EUFS_PMAP_CNT_SHIFT_PER_CACHELINE))
eufs_flush_cacheline(&sbi->page_map[page_no]);
page_no0 = page_no;
}
eufs_dbg("!persistallocation: pb=%px sorted %px~%px %ld\n", pb,
pb->batch[0], pb->batch[pb->n_used - 1], pb->n_used);
reset:
pb->n_used = 0;
}
static __always_inline void eufs_alloc_persist(struct super_block *sb,
void *ptr, bool forced)
{
struct eufs_sb_info *sbi = EUFS_SB(sb);
u64 page_no = _PAGE_NO(ptr);
_set_bitmap(sbi, (u64)ptr, forced);
if (_IS_LINE(ptr))
eufs_flush_cacheline(_LINE_MAP(ptr));
eufs_flush_cacheline(&sbi->page_map[page_no]);
}
#undef _PAGE_NO
#undef _LINE_MAP
#undef _IS_LINE
#endif /* EUFS_PBATCH_H */
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册