提交 c2f340a6 编写于 作者: L Linus Torvalds

Merge branch 'for-linus' of git://git.open-osd.org/linux-open-osd

* 'for-linus' of git://git.open-osd.org/linux-open-osd:
  ore: Make ore its own module
  exofs: Rename raid engine from exofs/ios.c => ore
  exofs: ios: Move to a per inode components & device-table
  exofs: Move exofs specific osd operations out of ios.c
  exofs: Add offset/length to exofs_get_io_state
  exofs: Fix truncate for the raid-groups case
  exofs: Small cleanup of exofs_fill_super
  exofs: BUG: Avoid sbi realloc
  exofs: Remove pnfs-osd private definitions
  nfs_xdr: Move nfs4_string definition out of #ifdef CONFIG_NFS_V4
...@@ -12,5 +12,8 @@ ...@@ -12,5 +12,8 @@
# Kbuild - Gets included from the Kernels Makefile and build system # Kbuild - Gets included from the Kernels Makefile and build system
# #
exofs-y := ios.o inode.o file.o symlink.o namei.o dir.o super.o # ore module library
obj-$(CONFIG_ORE) += ore.o
exofs-y := inode.o file.o symlink.o namei.o dir.o super.o
obj-$(CONFIG_EXOFS_FS) += exofs.o obj-$(CONFIG_EXOFS_FS) += exofs.o
config ORE
tristate
config EXOFS_FS config EXOFS_FS
tristate "exofs: OSD based file system support" tristate "exofs: OSD based file system support"
depends on SCSI_OSD_ULD depends on SCSI_OSD_ULD
select ORE
help help
EXOFS is a file system that uses an OSD storage device, EXOFS is a file system that uses an OSD storage device,
as its backing storage. as its backing storage.
......
...@@ -36,12 +36,9 @@ ...@@ -36,12 +36,9 @@
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/time.h> #include <linux/time.h>
#include <linux/backing-dev.h> #include <linux/backing-dev.h>
#include "common.h" #include <scsi/osd_ore.h>
/* FIXME: Remove once pnfs hits mainline #include "common.h"
* #include <linux/exportfs/pnfs_osd_xdr.h>
*/
#include "pnfs.h"
#define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a) #define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a)
...@@ -56,27 +53,11 @@ ...@@ -56,27 +53,11 @@
/* u64 has problems with printk this will cast it to unsigned long long */ /* u64 has problems with printk this will cast it to unsigned long long */
#define _LLU(x) (unsigned long long)(x) #define _LLU(x) (unsigned long long)(x)
struct exofs_layout {
osd_id s_pid; /* partition ID of file system*/
/* Our way of looking at the data_map */
unsigned stripe_unit;
unsigned mirrors_p1;
unsigned group_width;
u64 group_depth;
unsigned group_count;
enum exofs_inode_layout_gen_functions lay_func;
unsigned s_numdevs; /* Num of devices in array */
struct osd_dev *s_ods[0]; /* Variable length */
};
/* /*
* our extension to the in-memory superblock * our extension to the in-memory superblock
*/ */
struct exofs_sb_info { struct exofs_sb_info {
struct backing_dev_info bdi; /* register our bdi with VFS */
struct exofs_sb_stats s_ess; /* Written often, pre-allocate*/ struct exofs_sb_stats s_ess; /* Written often, pre-allocate*/
int s_timeout; /* timeout for OSD operations */ int s_timeout; /* timeout for OSD operations */
uint64_t s_nextid; /* highest object ID used */ uint64_t s_nextid; /* highest object ID used */
...@@ -84,16 +65,13 @@ struct exofs_sb_info { ...@@ -84,16 +65,13 @@ struct exofs_sb_info {
spinlock_t s_next_gen_lock; /* spinlock for gen # update */ spinlock_t s_next_gen_lock; /* spinlock for gen # update */
u32 s_next_generation; /* next gen # to use */ u32 s_next_generation; /* next gen # to use */
atomic_t s_curr_pending; /* number of pending commands */ atomic_t s_curr_pending; /* number of pending commands */
uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */
struct backing_dev_info bdi; /* register our bdi with VFS */
struct pnfs_osd_data_map data_map; /* Default raid to use struct pnfs_osd_data_map data_map; /* Default raid to use
* FIXME: Needed ? * FIXME: Needed ?
*/ */
/* struct exofs_layout dir_layout;*/ /* Default dir layout */ struct ore_layout layout; /* Default files layout */
struct exofs_layout layout; /* Default files layout, struct ore_comp one_comp; /* id & cred of partition id=0*/
* contains the variable osd_dev struct ore_components comps; /* comps for the partition */
* array. Keep last */
struct osd_dev *_min_one_dev[1]; /* Place holder for one dev */ struct osd_dev *_min_one_dev[1]; /* Place holder for one dev */
}; };
...@@ -107,7 +85,8 @@ struct exofs_i_info { ...@@ -107,7 +85,8 @@ struct exofs_i_info {
uint32_t i_data[EXOFS_IDATA];/*short symlink names and device #s*/ uint32_t i_data[EXOFS_IDATA];/*short symlink names and device #s*/
uint32_t i_dir_start_lookup; /* which page to start lookup */ uint32_t i_dir_start_lookup; /* which page to start lookup */
uint64_t i_commit_size; /* the object's written length */ uint64_t i_commit_size; /* the object's written length */
uint8_t i_cred[OSD_CAP_LEN];/* all-powerful credential */ struct ore_comp one_comp; /* same component for all devices */
struct ore_components comps; /* inode view of the device table */
}; };
static inline osd_id exofs_oi_objno(struct exofs_i_info *oi) static inline osd_id exofs_oi_objno(struct exofs_i_info *oi)
...@@ -115,52 +94,6 @@ static inline osd_id exofs_oi_objno(struct exofs_i_info *oi) ...@@ -115,52 +94,6 @@ static inline osd_id exofs_oi_objno(struct exofs_i_info *oi)
return oi->vfs_inode.i_ino + EXOFS_OBJ_OFF; return oi->vfs_inode.i_ino + EXOFS_OBJ_OFF;
} }
struct exofs_io_state;
typedef void (*exofs_io_done_fn)(struct exofs_io_state *or, void *private);
struct exofs_io_state {
struct kref kref;
void *private;
exofs_io_done_fn done;
struct exofs_layout *layout;
struct osd_obj_id obj;
u8 *cred;
/* Global read/write IO*/
loff_t offset;
unsigned long length;
void *kern_buff;
struct page **pages;
unsigned nr_pages;
unsigned pgbase;
unsigned pages_consumed;
/* Attributes */
unsigned in_attr_len;
struct osd_attr *in_attr;
unsigned out_attr_len;
struct osd_attr *out_attr;
/* Variable array of size numdevs */
unsigned numdevs;
struct exofs_per_dev_state {
struct osd_request *or;
struct bio *bio;
loff_t offset;
unsigned length;
unsigned dev;
} per_dev[];
};
static inline unsigned exofs_io_state_size(unsigned numdevs)
{
return sizeof(struct exofs_io_state) +
sizeof(struct exofs_per_dev_state) * numdevs;
}
/* /*
* our inode flags * our inode flags
*/ */
...@@ -204,12 +137,6 @@ static inline struct exofs_i_info *exofs_i(struct inode *inode) ...@@ -204,12 +137,6 @@ static inline struct exofs_i_info *exofs_i(struct inode *inode)
return container_of(inode, struct exofs_i_info, vfs_inode); return container_of(inode, struct exofs_i_info, vfs_inode);
} }
/*
* Given a layout, object_number and stripe_index return the associated global
* dev_index
*/
unsigned exofs_layout_od_id(struct exofs_layout *layout,
osd_id obj_no, unsigned layout_index);
/* /*
* Maximum count of links to a file * Maximum count of links to a file
*/ */
...@@ -219,44 +146,8 @@ unsigned exofs_layout_od_id(struct exofs_layout *layout, ...@@ -219,44 +146,8 @@ unsigned exofs_layout_od_id(struct exofs_layout *layout,
* function declarations * * function declarations *
*************************/ *************************/
/* ios.c */
void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
const struct osd_obj_id *obj);
int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
u64 offset, void *p, unsigned length);
int exofs_get_io_state(struct exofs_layout *layout,
struct exofs_io_state **ios);
void exofs_put_io_state(struct exofs_io_state *ios);
int exofs_check_io(struct exofs_io_state *ios, u64 *resid);
int exofs_sbi_create(struct exofs_io_state *ios);
int exofs_sbi_remove(struct exofs_io_state *ios);
int exofs_sbi_write(struct exofs_io_state *ios);
int exofs_sbi_read(struct exofs_io_state *ios);
int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr);
int exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len);
static inline int exofs_oi_write(struct exofs_i_info *oi,
struct exofs_io_state *ios)
{
ios->obj.id = exofs_oi_objno(oi);
ios->cred = oi->i_cred;
return exofs_sbi_write(ios);
}
static inline int exofs_oi_read(struct exofs_i_info *oi,
struct exofs_io_state *ios)
{
ios->obj.id = exofs_oi_objno(oi);
ios->cred = oi->i_cred;
return exofs_sbi_read(ios);
}
/* inode.c */ /* inode.c */
unsigned exofs_max_io_pages(struct exofs_layout *layout, unsigned exofs_max_io_pages(struct ore_layout *layout,
unsigned expected_pages); unsigned expected_pages);
int exofs_setattr(struct dentry *, struct iattr *); int exofs_setattr(struct dentry *, struct iattr *);
int exofs_write_begin(struct file *file, struct address_space *mapping, int exofs_write_begin(struct file *file, struct address_space *mapping,
...@@ -281,6 +172,8 @@ int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *, ...@@ -281,6 +172,8 @@ int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *,
struct inode *); struct inode *);
/* super.c */ /* super.c */
void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
const struct osd_obj_id *obj);
int exofs_sbi_write_stats(struct exofs_sb_info *sbi); int exofs_sbi_write_stats(struct exofs_sb_info *sbi);
/********************* /*********************
...@@ -295,7 +188,6 @@ extern const struct file_operations exofs_file_operations; ...@@ -295,7 +188,6 @@ extern const struct file_operations exofs_file_operations;
/* inode.c */ /* inode.c */
extern const struct address_space_operations exofs_aops; extern const struct address_space_operations exofs_aops;
extern const struct osd_attr g_attr_logical_length;
/* namei.c */ /* namei.c */
extern const struct inode_operations exofs_dir_inode_operations; extern const struct inode_operations exofs_dir_inode_operations;
...@@ -305,4 +197,33 @@ extern const struct inode_operations exofs_special_inode_operations; ...@@ -305,4 +197,33 @@ extern const struct inode_operations exofs_special_inode_operations;
extern const struct inode_operations exofs_symlink_inode_operations; extern const struct inode_operations exofs_symlink_inode_operations;
extern const struct inode_operations exofs_fast_symlink_inode_operations; extern const struct inode_operations exofs_fast_symlink_inode_operations;
/* exofs_init_comps will initialize an ore_components device array
* pointing to a single ore_comp struct, and a round-robin view
* of the device table.
* The first device of each inode is the [inode->ino % num_devices]
* and the rest of the devices sequentially following where the
* first device is after the last device.
* It is assumed that the global device array at @sbi is twice
* bigger and that the device table repeats twice.
* See: exofs_read_lookup_dev_table()
*/
static inline void exofs_init_comps(struct ore_components *comps,
struct ore_comp *one_comp,
struct exofs_sb_info *sbi, osd_id oid)
{
unsigned dev_mod = (unsigned)oid, first_dev;
one_comp->obj.partition = sbi->one_comp.obj.partition;
one_comp->obj.id = oid;
exofs_make_credential(one_comp->cred, &one_comp->obj);
comps->numdevs = sbi->comps.numdevs;
comps->single_comp = EC_SINGLE_COMP;
comps->comps = one_comp;
/* Round robin device view of the table */
first_dev = (dev_mod * sbi->layout.mirrors_p1) % sbi->comps.numdevs;
comps->ods = sbi->comps.ods + first_dev;
}
#endif #endif
...@@ -43,7 +43,7 @@ enum { BIO_MAX_PAGES_KMALLOC = ...@@ -43,7 +43,7 @@ enum { BIO_MAX_PAGES_KMALLOC =
PAGE_SIZE / sizeof(struct page *), PAGE_SIZE / sizeof(struct page *),
}; };
unsigned exofs_max_io_pages(struct exofs_layout *layout, unsigned exofs_max_io_pages(struct ore_layout *layout,
unsigned expected_pages) unsigned expected_pages)
{ {
unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC); unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC);
...@@ -58,7 +58,7 @@ struct page_collect { ...@@ -58,7 +58,7 @@ struct page_collect {
struct exofs_sb_info *sbi; struct exofs_sb_info *sbi;
struct inode *inode; struct inode *inode;
unsigned expected_pages; unsigned expected_pages;
struct exofs_io_state *ios; struct ore_io_state *ios;
struct page **pages; struct page **pages;
unsigned alloc_pages; unsigned alloc_pages;
...@@ -110,13 +110,6 @@ static int pcol_try_alloc(struct page_collect *pcol) ...@@ -110,13 +110,6 @@ static int pcol_try_alloc(struct page_collect *pcol)
{ {
unsigned pages; unsigned pages;
if (!pcol->ios) { /* First time allocate io_state */
int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios);
if (ret)
return ret;
}
/* TODO: easily support bio chaining */ /* TODO: easily support bio chaining */
pages = exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages); pages = exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages);
...@@ -140,7 +133,7 @@ static void pcol_free(struct page_collect *pcol) ...@@ -140,7 +133,7 @@ static void pcol_free(struct page_collect *pcol)
pcol->pages = NULL; pcol->pages = NULL;
if (pcol->ios) { if (pcol->ios) {
exofs_put_io_state(pcol->ios); ore_put_io_state(pcol->ios);
pcol->ios = NULL; pcol->ios = NULL;
} }
} }
...@@ -200,7 +193,7 @@ static int __readpages_done(struct page_collect *pcol) ...@@ -200,7 +193,7 @@ static int __readpages_done(struct page_collect *pcol)
u64 resid; u64 resid;
u64 good_bytes; u64 good_bytes;
u64 length = 0; u64 length = 0;
int ret = exofs_check_io(pcol->ios, &resid); int ret = ore_check_io(pcol->ios, &resid);
if (likely(!ret)) if (likely(!ret))
good_bytes = pcol->length; good_bytes = pcol->length;
...@@ -241,7 +234,7 @@ static int __readpages_done(struct page_collect *pcol) ...@@ -241,7 +234,7 @@ static int __readpages_done(struct page_collect *pcol)
} }
/* callback of async reads */ /* callback of async reads */
static void readpages_done(struct exofs_io_state *ios, void *p) static void readpages_done(struct ore_io_state *ios, void *p)
{ {
struct page_collect *pcol = p; struct page_collect *pcol = p;
...@@ -269,20 +262,28 @@ static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) ...@@ -269,20 +262,28 @@ static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
static int read_exec(struct page_collect *pcol) static int read_exec(struct page_collect *pcol)
{ {
struct exofs_i_info *oi = exofs_i(pcol->inode); struct exofs_i_info *oi = exofs_i(pcol->inode);
struct exofs_io_state *ios = pcol->ios; struct ore_io_state *ios;
struct page_collect *pcol_copy = NULL; struct page_collect *pcol_copy = NULL;
int ret; int ret;
if (!pcol->pages) if (!pcol->pages)
return 0; return 0;
if (!pcol->ios) {
int ret = ore_get_rw_state(&pcol->sbi->layout, &oi->comps, true,
pcol->pg_first << PAGE_CACHE_SHIFT,
pcol->length, &pcol->ios);
if (ret)
return ret;
}
ios = pcol->ios;
ios->pages = pcol->pages; ios->pages = pcol->pages;
ios->nr_pages = pcol->nr_pages; ios->nr_pages = pcol->nr_pages;
ios->length = pcol->length;
ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT;
if (pcol->read_4_write) { if (pcol->read_4_write) {
exofs_oi_read(oi, pcol->ios); ore_read(pcol->ios);
return __readpages_done(pcol); return __readpages_done(pcol);
} }
...@@ -295,14 +296,14 @@ static int read_exec(struct page_collect *pcol) ...@@ -295,14 +296,14 @@ static int read_exec(struct page_collect *pcol)
*pcol_copy = *pcol; *pcol_copy = *pcol;
ios->done = readpages_done; ios->done = readpages_done;
ios->private = pcol_copy; ios->private = pcol_copy;
ret = exofs_oi_read(oi, ios); ret = ore_read(ios);
if (unlikely(ret)) if (unlikely(ret))
goto err; goto err;
atomic_inc(&pcol->sbi->s_curr_pending); atomic_inc(&pcol->sbi->s_curr_pending);
EXOFS_DBGMSG2("read_exec obj=0x%llx start=0x%llx length=0x%lx\n", EXOFS_DBGMSG2("read_exec obj=0x%llx start=0x%llx length=0x%lx\n",
ios->obj.id, _LLU(ios->offset), pcol->length); oi->one_comp.obj.id, _LLU(ios->offset), pcol->length);
/* pages ownership was passed to pcol_copy */ /* pages ownership was passed to pcol_copy */
_pcol_reset(pcol); _pcol_reset(pcol);
...@@ -457,14 +458,14 @@ static int exofs_readpage(struct file *file, struct page *page) ...@@ -457,14 +458,14 @@ static int exofs_readpage(struct file *file, struct page *page)
} }
/* Callback for osd_write. All writes are asynchronous */ /* Callback for osd_write. All writes are asynchronous */
static void writepages_done(struct exofs_io_state *ios, void *p) static void writepages_done(struct ore_io_state *ios, void *p)
{ {
struct page_collect *pcol = p; struct page_collect *pcol = p;
int i; int i;
u64 resid; u64 resid;
u64 good_bytes; u64 good_bytes;
u64 length = 0; u64 length = 0;
int ret = exofs_check_io(ios, &resid); int ret = ore_check_io(ios, &resid);
atomic_dec(&pcol->sbi->s_curr_pending); atomic_dec(&pcol->sbi->s_curr_pending);
...@@ -507,13 +508,21 @@ static void writepages_done(struct exofs_io_state *ios, void *p) ...@@ -507,13 +508,21 @@ static void writepages_done(struct exofs_io_state *ios, void *p)
static int write_exec(struct page_collect *pcol) static int write_exec(struct page_collect *pcol)
{ {
struct exofs_i_info *oi = exofs_i(pcol->inode); struct exofs_i_info *oi = exofs_i(pcol->inode);
struct exofs_io_state *ios = pcol->ios; struct ore_io_state *ios;
struct page_collect *pcol_copy = NULL; struct page_collect *pcol_copy = NULL;
int ret; int ret;
if (!pcol->pages) if (!pcol->pages)
return 0; return 0;
BUG_ON(pcol->ios);
ret = ore_get_rw_state(&pcol->sbi->layout, &oi->comps, false,
pcol->pg_first << PAGE_CACHE_SHIFT,
pcol->length, &pcol->ios);
if (unlikely(ret))
goto err;
pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
if (!pcol_copy) { if (!pcol_copy) {
EXOFS_ERR("write_exec: Failed to kmalloc(pcol)\n"); EXOFS_ERR("write_exec: Failed to kmalloc(pcol)\n");
...@@ -523,16 +532,15 @@ static int write_exec(struct page_collect *pcol) ...@@ -523,16 +532,15 @@ static int write_exec(struct page_collect *pcol)
*pcol_copy = *pcol; *pcol_copy = *pcol;
ios = pcol->ios;
ios->pages = pcol_copy->pages; ios->pages = pcol_copy->pages;
ios->nr_pages = pcol_copy->nr_pages; ios->nr_pages = pcol_copy->nr_pages;
ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT;
ios->length = pcol_copy->length;
ios->done = writepages_done; ios->done = writepages_done;
ios->private = pcol_copy; ios->private = pcol_copy;
ret = exofs_oi_write(oi, ios); ret = ore_write(ios);
if (unlikely(ret)) { if (unlikely(ret)) {
EXOFS_ERR("write_exec: exofs_oi_write() Failed\n"); EXOFS_ERR("write_exec: ore_write() Failed\n");
goto err; goto err;
} }
...@@ -844,17 +852,15 @@ static inline int exofs_inode_is_fast_symlink(struct inode *inode) ...@@ -844,17 +852,15 @@ static inline int exofs_inode_is_fast_symlink(struct inode *inode)
return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0); return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0);
} }
const struct osd_attr g_attr_logical_length = ATTR_DEF(
OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
static int _do_truncate(struct inode *inode, loff_t newsize) static int _do_truncate(struct inode *inode, loff_t newsize)
{ {
struct exofs_i_info *oi = exofs_i(inode); struct exofs_i_info *oi = exofs_i(inode);
struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
int ret; int ret;
inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_mtime = inode->i_ctime = CURRENT_TIME;
ret = exofs_oi_truncate(oi, (u64)newsize); ret = ore_truncate(&sbi->layout, &oi->comps, (u64)newsize);
if (likely(!ret)) if (likely(!ret))
truncate_setsize(inode, newsize); truncate_setsize(inode, newsize);
...@@ -917,30 +923,26 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, ...@@ -917,30 +923,26 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
[1] = g_attr_inode_file_layout, [1] = g_attr_inode_file_layout,
[2] = g_attr_inode_dir_layout, [2] = g_attr_inode_dir_layout,
}; };
struct exofs_io_state *ios; struct ore_io_state *ios;
struct exofs_on_disk_inode_layout *layout; struct exofs_on_disk_inode_layout *layout;
int ret; int ret;
ret = exofs_get_io_state(&sbi->layout, &ios); ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios);
if (unlikely(ret)) { if (unlikely(ret)) {
EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__);
return ret; return ret;
} }
ios->obj.id = exofs_oi_objno(oi); attrs[1].len = exofs_on_disk_inode_layout_size(sbi->comps.numdevs);
exofs_make_credential(oi->i_cred, &ios->obj); attrs[2].len = exofs_on_disk_inode_layout_size(sbi->comps.numdevs);
ios->cred = oi->i_cred;
attrs[1].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs);
attrs[2].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs);
ios->in_attr = attrs; ios->in_attr = attrs;
ios->in_attr_len = ARRAY_SIZE(attrs); ios->in_attr_len = ARRAY_SIZE(attrs);
ret = exofs_sbi_read(ios); ret = ore_read(ios);
if (unlikely(ret)) { if (unlikely(ret)) {
EXOFS_ERR("object(0x%llx) corrupted, return empty file=>%d\n", EXOFS_ERR("object(0x%llx) corrupted, return empty file=>%d\n",
_LLU(ios->obj.id), ret); _LLU(oi->one_comp.obj.id), ret);
memset(inode, 0, sizeof(*inode)); memset(inode, 0, sizeof(*inode));
inode->i_mode = 0040000 | (0777 & ~022); inode->i_mode = 0040000 | (0777 & ~022);
/* If object is lost on target we might as well enable it's /* If object is lost on target we might as well enable it's
...@@ -990,7 +992,7 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi, ...@@ -990,7 +992,7 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
} }
out: out:
exofs_put_io_state(ios); ore_put_io_state(ios);
return ret; return ret;
} }
...@@ -1016,6 +1018,8 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino) ...@@ -1016,6 +1018,8 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
return inode; return inode;
oi = exofs_i(inode); oi = exofs_i(inode);
__oi_init(oi); __oi_init(oi);
exofs_init_comps(&oi->comps, &oi->one_comp, sb->s_fs_info,
exofs_oi_objno(oi));
/* read the inode from the osd */ /* read the inode from the osd */
ret = exofs_get_inode(sb, oi, &fcb); ret = exofs_get_inode(sb, oi, &fcb);
...@@ -1107,21 +1111,22 @@ int __exofs_wait_obj_created(struct exofs_i_info *oi) ...@@ -1107,21 +1111,22 @@ int __exofs_wait_obj_created(struct exofs_i_info *oi)
* set the obj_created flag so that other methods know that the object exists on * set the obj_created flag so that other methods know that the object exists on
* the OSD. * the OSD.
*/ */
static void create_done(struct exofs_io_state *ios, void *p) static void create_done(struct ore_io_state *ios, void *p)
{ {
struct inode *inode = p; struct inode *inode = p;
struct exofs_i_info *oi = exofs_i(inode); struct exofs_i_info *oi = exofs_i(inode);
struct exofs_sb_info *sbi = inode->i_sb->s_fs_info; struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
int ret; int ret;
ret = exofs_check_io(ios, NULL); ret = ore_check_io(ios, NULL);
exofs_put_io_state(ios); ore_put_io_state(ios);
atomic_dec(&sbi->s_curr_pending); atomic_dec(&sbi->s_curr_pending);
if (unlikely(ret)) { if (unlikely(ret)) {
EXOFS_ERR("object=0x%llx creation failed in pid=0x%llx", EXOFS_ERR("object=0x%llx creation failed in pid=0x%llx",
_LLU(exofs_oi_objno(oi)), _LLU(sbi->layout.s_pid)); _LLU(exofs_oi_objno(oi)),
_LLU(oi->one_comp.obj.partition));
/*TODO: When FS is corrupted creation can fail, object already /*TODO: When FS is corrupted creation can fail, object already
* exist. Get rid of this asynchronous creation, if exist * exist. Get rid of this asynchronous creation, if exist
* increment the obj counter and try the next object. Until we * increment the obj counter and try the next object. Until we
...@@ -1140,14 +1145,13 @@ static void create_done(struct exofs_io_state *ios, void *p) ...@@ -1140,14 +1145,13 @@ static void create_done(struct exofs_io_state *ios, void *p)
*/ */
struct inode *exofs_new_inode(struct inode *dir, int mode) struct inode *exofs_new_inode(struct inode *dir, int mode)
{ {
struct super_block *sb; struct super_block *sb = dir->i_sb;
struct exofs_sb_info *sbi = sb->s_fs_info;
struct inode *inode; struct inode *inode;
struct exofs_i_info *oi; struct exofs_i_info *oi;
struct exofs_sb_info *sbi; struct ore_io_state *ios;
struct exofs_io_state *ios;
int ret; int ret;
sb = dir->i_sb;
inode = new_inode(sb); inode = new_inode(sb);
if (!inode) if (!inode)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
...@@ -1157,8 +1161,6 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) ...@@ -1157,8 +1161,6 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
set_obj_2bcreated(oi); set_obj_2bcreated(oi);
sbi = sb->s_fs_info;
inode->i_mapping->backing_dev_info = sb->s_bdi; inode->i_mapping->backing_dev_info = sb->s_bdi;
inode_init_owner(inode, dir, mode); inode_init_owner(inode, dir, mode);
inode->i_ino = sbi->s_nextid++; inode->i_ino = sbi->s_nextid++;
...@@ -1170,25 +1172,24 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) ...@@ -1170,25 +1172,24 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
spin_unlock(&sbi->s_next_gen_lock); spin_unlock(&sbi->s_next_gen_lock);
insert_inode_hash(inode); insert_inode_hash(inode);
exofs_init_comps(&oi->comps, &oi->one_comp, sb->s_fs_info,
exofs_oi_objno(oi));
exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */ exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */
mark_inode_dirty(inode); mark_inode_dirty(inode);
ret = exofs_get_io_state(&sbi->layout, &ios); ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios);
if (unlikely(ret)) { if (unlikely(ret)) {
EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n"); EXOFS_ERR("exofs_new_inode: ore_get_io_state failed\n");
return ERR_PTR(ret); return ERR_PTR(ret);
} }
ios->obj.id = exofs_oi_objno(oi);
exofs_make_credential(oi->i_cred, &ios->obj);
ios->done = create_done; ios->done = create_done;
ios->private = inode; ios->private = inode;
ios->cred = oi->i_cred;
ret = exofs_sbi_create(ios); ret = ore_create(ios);
if (ret) { if (ret) {
exofs_put_io_state(ios); ore_put_io_state(ios);
return ERR_PTR(ret); return ERR_PTR(ret);
} }
atomic_inc(&sbi->s_curr_pending); atomic_inc(&sbi->s_curr_pending);
...@@ -1207,11 +1208,11 @@ struct updatei_args { ...@@ -1207,11 +1208,11 @@ struct updatei_args {
/* /*
* Callback function from exofs_update_inode(). * Callback function from exofs_update_inode().
*/ */
static void updatei_done(struct exofs_io_state *ios, void *p) static void updatei_done(struct ore_io_state *ios, void *p)
{ {
struct updatei_args *args = p; struct updatei_args *args = p;
exofs_put_io_state(ios); ore_put_io_state(ios);
atomic_dec(&args->sbi->s_curr_pending); atomic_dec(&args->sbi->s_curr_pending);
...@@ -1227,7 +1228,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync) ...@@ -1227,7 +1228,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
struct exofs_i_info *oi = exofs_i(inode); struct exofs_i_info *oi = exofs_i(inode);
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
struct exofs_sb_info *sbi = sb->s_fs_info; struct exofs_sb_info *sbi = sb->s_fs_info;
struct exofs_io_state *ios; struct ore_io_state *ios;
struct osd_attr attr; struct osd_attr attr;
struct exofs_fcb *fcb; struct exofs_fcb *fcb;
struct updatei_args *args; struct updatei_args *args;
...@@ -1266,9 +1267,9 @@ static int exofs_update_inode(struct inode *inode, int do_sync) ...@@ -1266,9 +1267,9 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
} else } else
memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data)); memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data));
ret = exofs_get_io_state(&sbi->layout, &ios); ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios);
if (unlikely(ret)) { if (unlikely(ret)) {
EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__);
goto free_args; goto free_args;
} }
...@@ -1285,13 +1286,13 @@ static int exofs_update_inode(struct inode *inode, int do_sync) ...@@ -1285,13 +1286,13 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
ios->private = args; ios->private = args;
} }
ret = exofs_oi_write(oi, ios); ret = ore_write(ios);
if (!do_sync && !ret) { if (!do_sync && !ret) {
atomic_inc(&sbi->s_curr_pending); atomic_inc(&sbi->s_curr_pending);
goto out; /* deallocation in updatei_done */ goto out; /* deallocation in updatei_done */
} }
exofs_put_io_state(ios); ore_put_io_state(ios);
free_args: free_args:
kfree(args); kfree(args);
out: out:
...@@ -1310,11 +1311,11 @@ int exofs_write_inode(struct inode *inode, struct writeback_control *wbc) ...@@ -1310,11 +1311,11 @@ int exofs_write_inode(struct inode *inode, struct writeback_control *wbc)
* Callback function from exofs_delete_inode() - don't have much cleaning up to * Callback function from exofs_delete_inode() - don't have much cleaning up to
* do. * do.
*/ */
static void delete_done(struct exofs_io_state *ios, void *p) static void delete_done(struct ore_io_state *ios, void *p)
{ {
struct exofs_sb_info *sbi = p; struct exofs_sb_info *sbi = p;
exofs_put_io_state(ios); ore_put_io_state(ios);
atomic_dec(&sbi->s_curr_pending); atomic_dec(&sbi->s_curr_pending);
} }
...@@ -1329,7 +1330,7 @@ void exofs_evict_inode(struct inode *inode) ...@@ -1329,7 +1330,7 @@ void exofs_evict_inode(struct inode *inode)
struct exofs_i_info *oi = exofs_i(inode); struct exofs_i_info *oi = exofs_i(inode);
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
struct exofs_sb_info *sbi = sb->s_fs_info; struct exofs_sb_info *sbi = sb->s_fs_info;
struct exofs_io_state *ios; struct ore_io_state *ios;
int ret; int ret;
truncate_inode_pages(&inode->i_data, 0); truncate_inode_pages(&inode->i_data, 0);
...@@ -1349,20 +1350,19 @@ void exofs_evict_inode(struct inode *inode) ...@@ -1349,20 +1350,19 @@ void exofs_evict_inode(struct inode *inode)
/* ignore the error, attempt a remove anyway */ /* ignore the error, attempt a remove anyway */
/* Now Remove the OSD objects */ /* Now Remove the OSD objects */
ret = exofs_get_io_state(&sbi->layout, &ios); ret = ore_get_io_state(&sbi->layout, &oi->comps, &ios);
if (unlikely(ret)) { if (unlikely(ret)) {
EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__); EXOFS_ERR("%s: ore_get_io_state failed\n", __func__);
return; return;
} }
ios->obj.id = exofs_oi_objno(oi);
ios->done = delete_done; ios->done = delete_done;
ios->private = sbi; ios->private = sbi;
ios->cred = oi->i_cred;
ret = exofs_sbi_remove(ios); ret = ore_remove(ios);
if (ret) { if (ret) {
EXOFS_ERR("%s: exofs_sbi_remove failed\n", __func__); EXOFS_ERR("%s: ore_remove failed\n", __func__);
exofs_put_io_state(ios); ore_put_io_state(ios);
return; return;
} }
atomic_inc(&sbi->s_curr_pending); atomic_inc(&sbi->s_curr_pending);
......
此差异已折叠。
/*
* Copyright (C) 2008, 2009
* Boaz Harrosh <bharrosh@panasas.com>
*
* This file is part of exofs.
*
* exofs is free software; you can redistribute it and/or modify it under the
* terms of the GNU General Public License version 2 as published by the Free
* Software Foundation.
*
*/
/* FIXME: Remove this file once pnfs hits mainline */
#ifndef __EXOFS_PNFS_H__
#define __EXOFS_PNFS_H__
#if ! defined(__PNFS_OSD_XDR_H__)
enum pnfs_iomode {
IOMODE_READ = 1,
IOMODE_RW = 2,
IOMODE_ANY = 3,
};
/* Layout Structure */
enum pnfs_osd_raid_algorithm4 {
PNFS_OSD_RAID_0 = 1,
PNFS_OSD_RAID_4 = 2,
PNFS_OSD_RAID_5 = 3,
PNFS_OSD_RAID_PQ = 4 /* Reed-Solomon P+Q */
};
struct pnfs_osd_data_map {
u32 odm_num_comps;
u64 odm_stripe_unit;
u32 odm_group_width;
u32 odm_group_depth;
u32 odm_mirror_cnt;
u32 odm_raid_algorithm;
};
#endif /* ! defined(__PNFS_OSD_XDR_H__) */
#endif /* __EXOFS_PNFS_H__ */
...@@ -40,6 +40,8 @@ ...@@ -40,6 +40,8 @@
#include "exofs.h" #include "exofs.h"
#define EXOFS_DBGMSG2(M...) do {} while (0)
/****************************************************************************** /******************************************************************************
* MOUNT OPTIONS * MOUNT OPTIONS
*****************************************************************************/ *****************************************************************************/
...@@ -208,10 +210,48 @@ static void destroy_inodecache(void) ...@@ -208,10 +210,48 @@ static void destroy_inodecache(void)
} }
/****************************************************************************** /******************************************************************************
* SUPERBLOCK FUNCTIONS * Some osd helpers
*****************************************************************************/ *****************************************************************************/
static const struct super_operations exofs_sops; void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj)
static const struct export_operations exofs_export_ops; {
osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
}
static int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
u64 offset, void *p, unsigned length)
{
struct osd_request *or = osd_start_request(od, GFP_KERNEL);
/* struct osd_sense_info osi = {.key = 0};*/
int ret;
if (unlikely(!or)) {
EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__);
return -ENOMEM;
}
ret = osd_req_read_kern(or, obj, offset, p, length);
if (unlikely(ret)) {
EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__);
goto out;
}
ret = osd_finalize_request(or, 0, cred, NULL);
if (unlikely(ret)) {
EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", ret);
goto out;
}
ret = osd_execute_request(or);
if (unlikely(ret))
EXOFS_DBGMSG("osd_execute_request() => %d\n", ret);
/* osd_req_decode_sense(or, ret); */
out:
osd_end_request(or);
EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx "
"length=0x%llx dev=%p ret=>%d\n",
_LLU(obj->id), _LLU(offset), _LLU(length), od, ret);
return ret;
}
static const struct osd_attr g_attr_sb_stats = ATTR_DEF( static const struct osd_attr g_attr_sb_stats = ATTR_DEF(
EXOFS_APAGE_SB_DATA, EXOFS_APAGE_SB_DATA,
...@@ -223,21 +263,19 @@ static int __sbi_read_stats(struct exofs_sb_info *sbi) ...@@ -223,21 +263,19 @@ static int __sbi_read_stats(struct exofs_sb_info *sbi)
struct osd_attr attrs[] = { struct osd_attr attrs[] = {
[0] = g_attr_sb_stats, [0] = g_attr_sb_stats,
}; };
struct exofs_io_state *ios; struct ore_io_state *ios;
int ret; int ret;
ret = exofs_get_io_state(&sbi->layout, &ios); ret = ore_get_io_state(&sbi->layout, &sbi->comps, &ios);
if (unlikely(ret)) { if (unlikely(ret)) {
EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__);
return ret; return ret;
} }
ios->cred = sbi->s_cred;
ios->in_attr = attrs; ios->in_attr = attrs;
ios->in_attr_len = ARRAY_SIZE(attrs); ios->in_attr_len = ARRAY_SIZE(attrs);
ret = exofs_sbi_read(ios); ret = ore_read(ios);
if (unlikely(ret)) { if (unlikely(ret)) {
EXOFS_ERR("Error reading super_block stats => %d\n", ret); EXOFS_ERR("Error reading super_block stats => %d\n", ret);
goto out; goto out;
...@@ -264,13 +302,13 @@ static int __sbi_read_stats(struct exofs_sb_info *sbi) ...@@ -264,13 +302,13 @@ static int __sbi_read_stats(struct exofs_sb_info *sbi)
} }
out: out:
exofs_put_io_state(ios); ore_put_io_state(ios);
return ret; return ret;
} }
static void stats_done(struct exofs_io_state *ios, void *p) static void stats_done(struct ore_io_state *ios, void *p)
{ {
exofs_put_io_state(ios); ore_put_io_state(ios);
/* Good thanks nothing to do anymore */ /* Good thanks nothing to do anymore */
} }
...@@ -280,12 +318,12 @@ int exofs_sbi_write_stats(struct exofs_sb_info *sbi) ...@@ -280,12 +318,12 @@ int exofs_sbi_write_stats(struct exofs_sb_info *sbi)
struct osd_attr attrs[] = { struct osd_attr attrs[] = {
[0] = g_attr_sb_stats, [0] = g_attr_sb_stats,
}; };
struct exofs_io_state *ios; struct ore_io_state *ios;
int ret; int ret;
ret = exofs_get_io_state(&sbi->layout, &ios); ret = ore_get_io_state(&sbi->layout, &sbi->comps, &ios);
if (unlikely(ret)) { if (unlikely(ret)) {
EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__); EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__);
return ret; return ret;
} }
...@@ -293,21 +331,27 @@ int exofs_sbi_write_stats(struct exofs_sb_info *sbi) ...@@ -293,21 +331,27 @@ int exofs_sbi_write_stats(struct exofs_sb_info *sbi)
sbi->s_ess.s_numfiles = cpu_to_le64(sbi->s_numfiles); sbi->s_ess.s_numfiles = cpu_to_le64(sbi->s_numfiles);
attrs[0].val_ptr = &sbi->s_ess; attrs[0].val_ptr = &sbi->s_ess;
ios->cred = sbi->s_cred;
ios->done = stats_done; ios->done = stats_done;
ios->private = sbi; ios->private = sbi;
ios->out_attr = attrs; ios->out_attr = attrs;
ios->out_attr_len = ARRAY_SIZE(attrs); ios->out_attr_len = ARRAY_SIZE(attrs);
ret = exofs_sbi_write(ios); ret = ore_write(ios);
if (unlikely(ret)) { if (unlikely(ret)) {
EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); EXOFS_ERR("%s: ore_write failed.\n", __func__);
exofs_put_io_state(ios); ore_put_io_state(ios);
} }
return ret; return ret;
} }
/******************************************************************************
* SUPERBLOCK FUNCTIONS
*****************************************************************************/
static const struct super_operations exofs_sops;
static const struct export_operations exofs_export_ops;
/* /*
* Write the superblock to the OSD * Write the superblock to the OSD
*/ */
...@@ -315,7 +359,9 @@ int exofs_sync_fs(struct super_block *sb, int wait) ...@@ -315,7 +359,9 @@ int exofs_sync_fs(struct super_block *sb, int wait)
{ {
struct exofs_sb_info *sbi; struct exofs_sb_info *sbi;
struct exofs_fscb *fscb; struct exofs_fscb *fscb;
struct exofs_io_state *ios; struct ore_comp one_comp;
struct ore_components comps;
struct ore_io_state *ios;
int ret = -ENOMEM; int ret = -ENOMEM;
fscb = kmalloc(sizeof(*fscb), GFP_KERNEL); fscb = kmalloc(sizeof(*fscb), GFP_KERNEL);
...@@ -331,7 +377,10 @@ int exofs_sync_fs(struct super_block *sb, int wait) ...@@ -331,7 +377,10 @@ int exofs_sync_fs(struct super_block *sb, int wait)
* version). Otherwise the exofs_fscb is read-only from mkfs time. All * version). Otherwise the exofs_fscb is read-only from mkfs time. All
* the writeable info is set in exofs_sbi_write_stats() above. * the writeable info is set in exofs_sbi_write_stats() above.
*/ */
ret = exofs_get_io_state(&sbi->layout, &ios);
exofs_init_comps(&comps, &one_comp, sbi, EXOFS_SUPER_ID);
ret = ore_get_io_state(&sbi->layout, &comps, &ios);
if (unlikely(ret)) if (unlikely(ret))
goto out; goto out;
...@@ -345,14 +394,12 @@ int exofs_sync_fs(struct super_block *sb, int wait) ...@@ -345,14 +394,12 @@ int exofs_sync_fs(struct super_block *sb, int wait)
fscb->s_newfs = 0; fscb->s_newfs = 0;
fscb->s_version = EXOFS_FSCB_VER; fscb->s_version = EXOFS_FSCB_VER;
ios->obj.id = EXOFS_SUPER_ID;
ios->offset = 0; ios->offset = 0;
ios->kern_buff = fscb; ios->kern_buff = fscb;
ios->cred = sbi->s_cred;
ret = exofs_sbi_write(ios); ret = ore_write(ios);
if (unlikely(ret)) if (unlikely(ret))
EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); EXOFS_ERR("%s: ore_write failed.\n", __func__);
else else
sb->s_dirt = 0; sb->s_dirt = 0;
...@@ -360,7 +407,7 @@ int exofs_sync_fs(struct super_block *sb, int wait) ...@@ -360,7 +407,7 @@ int exofs_sync_fs(struct super_block *sb, int wait)
unlock_super(sb); unlock_super(sb);
out: out:
EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret); EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret);
exofs_put_io_state(ios); ore_put_io_state(ios);
kfree(fscb); kfree(fscb);
return ret; return ret;
} }
...@@ -384,15 +431,17 @@ static void _exofs_print_device(const char *msg, const char *dev_path, ...@@ -384,15 +431,17 @@ static void _exofs_print_device(const char *msg, const char *dev_path,
void exofs_free_sbi(struct exofs_sb_info *sbi) void exofs_free_sbi(struct exofs_sb_info *sbi)
{ {
while (sbi->layout.s_numdevs) { while (sbi->comps.numdevs) {
int i = --sbi->layout.s_numdevs; int i = --sbi->comps.numdevs;
struct osd_dev *od = sbi->layout.s_ods[i]; struct osd_dev *od = sbi->comps.ods[i];
if (od) { if (od) {
sbi->layout.s_ods[i] = NULL; sbi->comps.ods[i] = NULL;
osduld_put_device(od); osduld_put_device(od);
} }
} }
if (sbi->comps.ods != sbi->_min_one_dev)
kfree(sbi->comps.ods);
kfree(sbi); kfree(sbi);
} }
...@@ -419,8 +468,8 @@ static void exofs_put_super(struct super_block *sb) ...@@ -419,8 +468,8 @@ static void exofs_put_super(struct super_block *sb)
msecs_to_jiffies(100)); msecs_to_jiffies(100));
} }
_exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0], _exofs_print_device("Unmounting", NULL, sbi->comps.ods[0],
sbi->layout.s_pid); sbi->one_comp.obj.partition);
bdi_destroy(&sbi->bdi); bdi_destroy(&sbi->bdi);
exofs_free_sbi(sbi); exofs_free_sbi(sbi);
...@@ -501,10 +550,19 @@ static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs, ...@@ -501,10 +550,19 @@ static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs,
return -EINVAL; return -EINVAL;
} }
EXOFS_DBGMSG("exofs: layout: "
"num_comps=%u stripe_unit=0x%x group_width=%u "
"group_depth=0x%llx mirrors_p1=%u raid_algorithm=%u\n",
numdevs,
sbi->layout.stripe_unit,
sbi->layout.group_width,
_LLU(sbi->layout.group_depth),
sbi->layout.mirrors_p1,
sbi->data_map.odm_raid_algorithm);
return 0; return 0;
} }
static unsigned __ra_pages(struct exofs_layout *layout) static unsigned __ra_pages(struct ore_layout *layout)
{ {
const unsigned _MIN_RA = 32; /* min 128K read-ahead */ const unsigned _MIN_RA = 32; /* min 128K read-ahead */
unsigned ra_pages = layout->group_width * layout->stripe_unit / unsigned ra_pages = layout->group_width * layout->stripe_unit /
...@@ -547,13 +605,11 @@ static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev, ...@@ -547,13 +605,11 @@ static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev,
return !(odi->systemid_len || odi->osdname_len); return !(odi->systemid_len || odi->osdname_len);
} }
static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
struct osd_dev *fscb_od,
unsigned table_count) unsigned table_count)
{ {
struct exofs_sb_info *sbi = *psbi; struct ore_comp comp;
struct osd_dev *fscb_od;
struct osd_obj_id obj = {.partition = sbi->layout.s_pid,
.id = EXOFS_DEVTABLE_ID};
struct exofs_device_table *dt; struct exofs_device_table *dt;
unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) + unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) +
sizeof(*dt); sizeof(*dt);
...@@ -567,10 +623,14 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, ...@@ -567,10 +623,14 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
return -ENOMEM; return -ENOMEM;
} }
fscb_od = sbi->layout.s_ods[0]; sbi->comps.numdevs = 0;
sbi->layout.s_ods[0] = NULL;
sbi->layout.s_numdevs = 0; comp.obj.partition = sbi->one_comp.obj.partition;
ret = exofs_read_kern(fscb_od, sbi->s_cred, &obj, 0, dt, table_bytes); comp.obj.id = EXOFS_DEVTABLE_ID;
exofs_make_credential(comp.cred, &comp.obj);
ret = exofs_read_kern(fscb_od, comp.cred, &comp.obj, 0, dt,
table_bytes);
if (unlikely(ret)) { if (unlikely(ret)) {
EXOFS_ERR("ERROR: reading device table\n"); EXOFS_ERR("ERROR: reading device table\n");
goto out; goto out;
...@@ -588,16 +648,18 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, ...@@ -588,16 +648,18 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
goto out; goto out;
if (likely(numdevs > 1)) { if (likely(numdevs > 1)) {
unsigned size = numdevs * sizeof(sbi->layout.s_ods[0]); unsigned size = numdevs * sizeof(sbi->comps.ods[0]);
sbi = krealloc(sbi, sizeof(*sbi) + size, GFP_KERNEL); /* Twice bigger table: See exofs_init_comps() and below
if (unlikely(!sbi)) { * comment
*/
sbi->comps.ods = kzalloc(size + size - 1, GFP_KERNEL);
if (unlikely(!sbi->comps.ods)) {
EXOFS_ERR("ERROR: faild allocating Device array[%d]\n",
numdevs);
ret = -ENOMEM; ret = -ENOMEM;
goto out; goto out;
} }
memset(&sbi->layout.s_ods[1], 0,
size - sizeof(sbi->layout.s_ods[0]));
*psbi = sbi;
} }
for (i = 0; i < numdevs; i++) { for (i = 0; i < numdevs; i++) {
...@@ -619,8 +681,8 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, ...@@ -619,8 +681,8 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
* line. We always keep them in device-table order. * line. We always keep them in device-table order.
*/ */
if (fscb_od && osduld_device_same(fscb_od, &odi)) { if (fscb_od && osduld_device_same(fscb_od, &odi)) {
sbi->layout.s_ods[i] = fscb_od; sbi->comps.ods[i] = fscb_od;
++sbi->layout.s_numdevs; ++sbi->comps.numdevs;
fscb_od = NULL; fscb_od = NULL;
continue; continue;
} }
...@@ -633,13 +695,13 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, ...@@ -633,13 +695,13 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
goto out; goto out;
} }
sbi->layout.s_ods[i] = od; sbi->comps.ods[i] = od;
++sbi->layout.s_numdevs; ++sbi->comps.numdevs;
/* Read the fscb of the other devices to make sure the FS /* Read the fscb of the other devices to make sure the FS
* partition is there. * partition is there.
*/ */
ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, ret = exofs_read_kern(od, comp.cred, &comp.obj, 0, &fscb,
sizeof(fscb)); sizeof(fscb));
if (unlikely(ret)) { if (unlikely(ret)) {
EXOFS_ERR("ERROR: Malformed participating device " EXOFS_ERR("ERROR: Malformed participating device "
...@@ -656,13 +718,22 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, ...@@ -656,13 +718,22 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
out: out:
kfree(dt); kfree(dt);
if (unlikely(!ret && fscb_od)) { if (likely(!ret)) {
EXOFS_ERR( unsigned numdevs = sbi->comps.numdevs;
"ERROR: Bad device-table container device not present\n");
osduld_put_device(fscb_od);
ret = -EINVAL;
}
if (unlikely(fscb_od)) {
EXOFS_ERR("ERROR: Bad device-table container device not present\n");
osduld_put_device(fscb_od);
return -EINVAL;
}
/* exofs round-robins the device table view according to inode
* number. We hold a: twice bigger table hence inodes can point
* to any device and have a sequential view of the table
* starting at this device. See exofs_init_comps()
*/
for (i = 0; i < numdevs - 1; ++i)
sbi->comps.ods[i + numdevs] = sbi->comps.ods[i];
}
return ret; return ret;
} }
...@@ -676,7 +747,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) ...@@ -676,7 +747,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
struct exofs_sb_info *sbi; /*extended info */ struct exofs_sb_info *sbi; /*extended info */
struct osd_dev *od; /* Master device */ struct osd_dev *od; /* Master device */
struct exofs_fscb fscb; /*on-disk superblock info */ struct exofs_fscb fscb; /*on-disk superblock info */
struct osd_obj_id obj; struct ore_comp comp;
unsigned table_count; unsigned table_count;
int ret; int ret;
...@@ -684,10 +755,6 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) ...@@ -684,10 +755,6 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
if (!sbi) if (!sbi)
return -ENOMEM; return -ENOMEM;
ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY);
if (ret)
goto free_bdi;
/* use mount options to fill superblock */ /* use mount options to fill superblock */
if (opts->is_osdname) { if (opts->is_osdname) {
struct osd_dev_info odi = {.systemid_len = 0}; struct osd_dev_info odi = {.systemid_len = 0};
...@@ -695,6 +762,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) ...@@ -695,6 +762,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
odi.osdname_len = strlen(opts->dev_name); odi.osdname_len = strlen(opts->dev_name);
odi.osdname = (u8 *)opts->dev_name; odi.osdname = (u8 *)opts->dev_name;
od = osduld_info_lookup(&odi); od = osduld_info_lookup(&odi);
kfree(opts->dev_name);
opts->dev_name = NULL;
} else { } else {
od = osduld_path_lookup(opts->dev_name); od = osduld_path_lookup(opts->dev_name);
} }
...@@ -709,11 +778,16 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) ...@@ -709,11 +778,16 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
sbi->layout.group_width = 1; sbi->layout.group_width = 1;
sbi->layout.group_depth = -1; sbi->layout.group_depth = -1;
sbi->layout.group_count = 1; sbi->layout.group_count = 1;
sbi->layout.s_ods[0] = od;
sbi->layout.s_numdevs = 1;
sbi->layout.s_pid = opts->pid;
sbi->s_timeout = opts->timeout; sbi->s_timeout = opts->timeout;
sbi->one_comp.obj.partition = opts->pid;
sbi->one_comp.obj.id = 0;
exofs_make_credential(sbi->one_comp.cred, &sbi->one_comp.obj);
sbi->comps.numdevs = 1;
sbi->comps.single_comp = EC_SINGLE_COMP;
sbi->comps.comps = &sbi->one_comp;
sbi->comps.ods = sbi->_min_one_dev;
/* fill in some other data by hand */ /* fill in some other data by hand */
memset(sb->s_id, 0, sizeof(sb->s_id)); memset(sb->s_id, 0, sizeof(sb->s_id));
strcpy(sb->s_id, "exofs"); strcpy(sb->s_id, "exofs");
...@@ -724,11 +798,11 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) ...@@ -724,11 +798,11 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_bdev = NULL; sb->s_bdev = NULL;
sb->s_dev = 0; sb->s_dev = 0;
obj.partition = sbi->layout.s_pid; comp.obj.partition = sbi->one_comp.obj.partition;
obj.id = EXOFS_SUPER_ID; comp.obj.id = EXOFS_SUPER_ID;
exofs_make_credential(sbi->s_cred, &obj); exofs_make_credential(comp.cred, &comp.obj);
ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, sizeof(fscb)); ret = exofs_read_kern(od, comp.cred, &comp.obj, 0, &fscb, sizeof(fscb));
if (unlikely(ret)) if (unlikely(ret))
goto free_sbi; goto free_sbi;
...@@ -757,9 +831,11 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) ...@@ -757,9 +831,11 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
table_count = le64_to_cpu(fscb.s_dev_table_count); table_count = le64_to_cpu(fscb.s_dev_table_count);
if (table_count) { if (table_count) {
ret = exofs_read_lookup_dev_table(&sbi, table_count); ret = exofs_read_lookup_dev_table(sbi, od, table_count);
if (unlikely(ret)) if (unlikely(ret))
goto free_sbi; goto free_sbi;
} else {
sbi->comps.ods[0] = od;
} }
__sbi_read_stats(sbi); __sbi_read_stats(sbi);
...@@ -793,20 +869,20 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent) ...@@ -793,20 +869,20 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
goto free_sbi; goto free_sbi;
} }
_exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0], ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY);
sbi->layout.s_pid); if (ret) {
if (opts->is_osdname) EXOFS_DBGMSG("Failed to bdi_setup_and_register\n");
kfree(opts->dev_name); goto free_sbi;
}
_exofs_print_device("Mounting", opts->dev_name, sbi->comps.ods[0],
sbi->one_comp.obj.partition);
return 0; return 0;
free_sbi: free_sbi:
bdi_destroy(&sbi->bdi);
free_bdi:
EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
opts->dev_name, sbi->layout.s_pid, ret); opts->dev_name, sbi->one_comp.obj.partition, ret);
exofs_free_sbi(sbi); exofs_free_sbi(sbi);
if (opts->is_osdname)
kfree(opts->dev_name);
return ret; return ret;
} }
...@@ -837,7 +913,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) ...@@ -837,7 +913,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
{ {
struct super_block *sb = dentry->d_sb; struct super_block *sb = dentry->d_sb;
struct exofs_sb_info *sbi = sb->s_fs_info; struct exofs_sb_info *sbi = sb->s_fs_info;
struct exofs_io_state *ios; struct ore_io_state *ios;
struct osd_attr attrs[] = { struct osd_attr attrs[] = {
ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS, ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS,
OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)), OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)),
...@@ -846,21 +922,18 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) ...@@ -846,21 +922,18 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
}; };
uint64_t capacity = ULLONG_MAX; uint64_t capacity = ULLONG_MAX;
uint64_t used = ULLONG_MAX; uint64_t used = ULLONG_MAX;
uint8_t cred_a[OSD_CAP_LEN];
int ret; int ret;
ret = exofs_get_io_state(&sbi->layout, &ios); ret = ore_get_io_state(&sbi->layout, &sbi->comps, &ios);
if (ret) { if (ret) {
EXOFS_DBGMSG("exofs_get_io_state failed.\n"); EXOFS_DBGMSG("ore_get_io_state failed.\n");
return ret; return ret;
} }
exofs_make_credential(cred_a, &ios->obj);
ios->cred = sbi->s_cred;
ios->in_attr = attrs; ios->in_attr = attrs;
ios->in_attr_len = ARRAY_SIZE(attrs); ios->in_attr_len = ARRAY_SIZE(attrs);
ret = exofs_sbi_read(ios); ret = ore_read(ios);
if (unlikely(ret)) if (unlikely(ret))
goto out; goto out;
...@@ -889,7 +962,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf) ...@@ -889,7 +962,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_namelen = EXOFS_NAME_LEN; buf->f_namelen = EXOFS_NAME_LEN;
out: out:
exofs_put_io_state(ios); ore_put_io_state(ios);
return ret; return ret;
} }
......
...@@ -773,6 +773,11 @@ struct nfs3_getaclres { ...@@ -773,6 +773,11 @@ struct nfs3_getaclres {
struct posix_acl * acl_default; struct posix_acl * acl_default;
}; };
struct nfs4_string {
unsigned int len;
char *data;
};
#ifdef CONFIG_NFS_V4 #ifdef CONFIG_NFS_V4
typedef u64 clientid4; typedef u64 clientid4;
...@@ -963,11 +968,6 @@ struct nfs4_server_caps_res { ...@@ -963,11 +968,6 @@ struct nfs4_server_caps_res {
struct nfs4_sequence_res seq_res; struct nfs4_sequence_res seq_res;
}; };
struct nfs4_string {
unsigned int len;
char *data;
};
#define NFS4_PATHNAME_MAXCOMPONENTS 512 #define NFS4_PATHNAME_MAXCOMPONENTS 512
struct nfs4_pathname { struct nfs4_pathname {
unsigned int ncomponents; unsigned int ncomponents;
......
/*
* Copyright (C) 2011
* Boaz Harrosh <bharrosh@panasas.com>
*
* Public Declarations of the ORE API
*
* This file is part of the ORE (Object Raid Engine) library.
*
* ORE is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation. (GPL v2)
*
* ORE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with the ORE; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __ORE_H__
#define __ORE_H__
#include <scsi/osd_initiator.h>
#include <scsi/osd_attributes.h>
#include <scsi/osd_sec.h>
#include <linux/pnfs_osd_xdr.h>
struct ore_comp {
struct osd_obj_id obj;
u8 cred[OSD_CAP_LEN];
};
struct ore_layout {
/* Our way of looking at the data_map */
unsigned stripe_unit;
unsigned mirrors_p1;
unsigned group_width;
u64 group_depth;
unsigned group_count;
};
struct ore_components {
unsigned numdevs; /* Num of devices in array */
/* If @single_comp == EC_SINGLE_COMP, @comps points to a single
* component. else there are @numdevs components
*/
enum EC_COMP_USAGE {
EC_SINGLE_COMP = 0, EC_MULTPLE_COMPS = 0xffffffff
} single_comp;
struct ore_comp *comps;
struct osd_dev **ods; /* osd_dev array */
};
struct ore_io_state;
typedef void (*ore_io_done_fn)(struct ore_io_state *ios, void *private);
struct ore_io_state {
struct kref kref;
void *private;
ore_io_done_fn done;
struct ore_layout *layout;
struct ore_components *comps;
/* Global read/write IO*/
loff_t offset;
unsigned long length;
void *kern_buff;
struct page **pages;
unsigned nr_pages;
unsigned pgbase;
unsigned pages_consumed;
/* Attributes */
unsigned in_attr_len;
struct osd_attr *in_attr;
unsigned out_attr_len;
struct osd_attr *out_attr;
bool reading;
/* Variable array of size numdevs */
unsigned numdevs;
struct ore_per_dev_state {
struct osd_request *or;
struct bio *bio;
loff_t offset;
unsigned length;
unsigned dev;
} per_dev[];
};
static inline unsigned ore_io_state_size(unsigned numdevs)
{
return sizeof(struct ore_io_state) +
sizeof(struct ore_per_dev_state) * numdevs;
}
/* ore.c */
int ore_get_rw_state(struct ore_layout *layout, struct ore_components *comps,
bool is_reading, u64 offset, u64 length,
struct ore_io_state **ios);
int ore_get_io_state(struct ore_layout *layout, struct ore_components *comps,
struct ore_io_state **ios);
void ore_put_io_state(struct ore_io_state *ios);
int ore_check_io(struct ore_io_state *ios, u64 *resid);
int ore_create(struct ore_io_state *ios);
int ore_remove(struct ore_io_state *ios);
int ore_write(struct ore_io_state *ios);
int ore_read(struct ore_io_state *ios);
int ore_truncate(struct ore_layout *layout, struct ore_components *comps,
u64 size);
int extract_attr_from_ios(struct ore_io_state *ios, struct osd_attr *attr);
extern const struct osd_attr g_attr_logical_length;
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册