提交 10cc04f5 编写于 作者: L Linus Torvalds

Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2

* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: (138 commits)
  ocfs2: Access the right buffer_head in ocfs2_merge_rec_left.
  ocfs2: use min_t in ocfs2_quota_read()
  ocfs2: remove unneeded lvb casts
  ocfs2: Add xattr support checking in init_security
  ocfs2: alloc xattr bucket in ocfs2_xattr_set_handle
  ocfs2: calculate and reserve credits for xattr value in mknod
  ocfs2/xattr: fix credits calculation during index create
  ocfs2/xattr: Always updating ctime during xattr set.
  ocfs2/xattr: Remove extend_trans call and add its credits from the beginning
  ocfs2/dlm: Fix race during lockres mastery
  ocfs2/dlm: Fix race in adding/removing lockres' to/from the tracking list
  ocfs2/dlm: Hold off sending lockres drop ref message while lockres is migrating
  ocfs2/dlm: Clean up errors in dlm_proxy_ast_handler()
  ocfs2/dlm: Fix a race between migrate request and exit domain
  ocfs2: One more hamming code optimization.
  ocfs2: Another hamming code optimization.
  ocfs2: Don't hand-code xor in ocfs2_hamming_encode().
  ocfs2: Enable metadata checksums.
  ocfs2: Validate superblock with checksum and ecc.
  ocfs2: Checksum and ECC for directory blocks.
  ...
......@@ -31,7 +31,6 @@ Features which OCFS2 does not support yet:
- quotas
- Directory change notification (F_NOTIFY)
- Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease)
- POSIX ACLs
Mount options
=============
......@@ -79,3 +78,5 @@ inode64 Indicates that Ocfs2 is allowed to create inodes at
bits of significance.
user_xattr (*) Enables Extended User Attributes.
nouser_xattr Disables Extended User Attributes.
acl Enables POSIX Access Control Lists support.
noacl (*) Disables POSIX Access Control Lists support.
......@@ -189,6 +189,8 @@ config OCFS2_FS
select CONFIGFS_FS
select JBD2
select CRC32
select QUOTA
select QUOTA_TREE
help
OCFS2 is a general purpose extent based shared disk cluster file
system with many similarities to ext3. It supports 64 bit inode
......@@ -258,15 +260,14 @@ config OCFS2_DEBUG_FS
this option for debugging only as it is likely to decrease
performance of the filesystem.
config OCFS2_COMPAT_JBD
bool "Use JBD for compatibility"
config OCFS2_FS_POSIX_ACL
bool "OCFS2 POSIX Access Control Lists"
depends on OCFS2_FS
select FS_POSIX_ACL
default n
select JBD
help
The ocfs2 filesystem now uses JBD2 for its journalling. JBD2
is backwards compatible with JBD. It is safe to say N here.
However, if you really want to use the original JBD, say Y here.
Posix Access Control Lists (ACLs) support permissions for users and
groups beyond the owner/group/world scheme.
endif # BLOCK
......@@ -303,6 +304,10 @@ config PRINT_QUOTA_WARNING
Note that this behavior is currently deprecated and may go away in
future. Please use notification via netlink socket instead.
# Generic support for tree structured quota files. Seleted when needed.
config QUOTA_TREE
tristate
config QFMT_V1
tristate "Old quota format support"
depends on QUOTA
......@@ -314,6 +319,7 @@ config QFMT_V1
config QFMT_V2
tristate "Quota format v2 support"
depends on QUOTA
select QUOTA_TREE
help
This quota format allows using quotas with 32-bit UIDs/GIDs. If you
need this functionality say Y here.
......
......@@ -54,6 +54,7 @@ obj-$(CONFIG_GENERIC_ACL) += generic_acl.o
obj-$(CONFIG_QUOTA) += dquot.o
obj-$(CONFIG_QFMT_V1) += quota_v1.o
obj-$(CONFIG_QFMT_V2) += quota_v2.o
obj-$(CONFIG_QUOTA_TREE) += quota_tree.o
obj-$(CONFIG_QUOTACTL) += quota.o
obj-$(CONFIG_PROC_FS) += proc/
......
此差异已折叠。
......@@ -713,7 +713,9 @@ static struct dquot_operations ext3_quota_operations = {
.acquire_dquot = ext3_acquire_dquot,
.release_dquot = ext3_release_dquot,
.mark_dirty = ext3_mark_dquot_dirty,
.write_info = ext3_write_info
.write_info = ext3_write_info,
.alloc_dquot = dquot_alloc,
.destroy_dquot = dquot_destroy,
};
static struct quotactl_ops ext3_qctl_operations = {
......@@ -1035,8 +1037,7 @@ static int parse_options (char *options, struct super_block *sb,
case Opt_grpjquota:
qtype = GRPQUOTA;
set_qf_name:
if ((sb_any_quota_enabled(sb) ||
sb_any_quota_suspended(sb)) &&
if (sb_any_quota_loaded(sb) &&
!sbi->s_qf_names[qtype]) {
printk(KERN_ERR
"EXT3-fs: Cannot change journaled "
......@@ -1075,8 +1076,7 @@ static int parse_options (char *options, struct super_block *sb,
case Opt_offgrpjquota:
qtype = GRPQUOTA;
clear_qf_name:
if ((sb_any_quota_enabled(sb) ||
sb_any_quota_suspended(sb)) &&
if (sb_any_quota_loaded(sb) &&
sbi->s_qf_names[qtype]) {
printk(KERN_ERR "EXT3-fs: Cannot change "
"journaled quota options when "
......@@ -1095,8 +1095,7 @@ static int parse_options (char *options, struct super_block *sb,
case Opt_jqfmt_vfsv0:
qfmt = QFMT_VFS_V0;
set_qf_format:
if ((sb_any_quota_enabled(sb) ||
sb_any_quota_suspended(sb)) &&
if (sb_any_quota_loaded(sb) &&
sbi->s_jquota_fmt != qfmt) {
printk(KERN_ERR "EXT3-fs: Cannot change "
"journaled quota options when "
......@@ -1115,8 +1114,7 @@ static int parse_options (char *options, struct super_block *sb,
set_opt(sbi->s_mount_opt, GRPQUOTA);
break;
case Opt_noquota:
if (sb_any_quota_enabled(sb) ||
sb_any_quota_suspended(sb)) {
if (sb_any_quota_loaded(sb)) {
printk(KERN_ERR "EXT3-fs: Cannot change quota "
"options when quota turned on.\n");
return 0;
......
......@@ -803,7 +803,9 @@ static struct dquot_operations ext4_quota_operations = {
.acquire_dquot = ext4_acquire_dquot,
.release_dquot = ext4_release_dquot,
.mark_dirty = ext4_mark_dquot_dirty,
.write_info = ext4_write_info
.write_info = ext4_write_info,
.alloc_dquot = dquot_alloc,
.destroy_dquot = dquot_destroy,
};
static struct quotactl_ops ext4_qctl_operations = {
......@@ -1142,8 +1144,7 @@ static int parse_options(char *options, struct super_block *sb,
case Opt_grpjquota:
qtype = GRPQUOTA;
set_qf_name:
if ((sb_any_quota_enabled(sb) ||
sb_any_quota_suspended(sb)) &&
if (sb_any_quota_loaded(sb) &&
!sbi->s_qf_names[qtype]) {
printk(KERN_ERR
"EXT4-fs: Cannot change journaled "
......@@ -1182,8 +1183,7 @@ static int parse_options(char *options, struct super_block *sb,
case Opt_offgrpjquota:
qtype = GRPQUOTA;
clear_qf_name:
if ((sb_any_quota_enabled(sb) ||
sb_any_quota_suspended(sb)) &&
if (sb_any_quota_loaded(sb) &&
sbi->s_qf_names[qtype]) {
printk(KERN_ERR "EXT4-fs: Cannot change "
"journaled quota options when "
......@@ -1202,8 +1202,7 @@ static int parse_options(char *options, struct super_block *sb,
case Opt_jqfmt_vfsv0:
qfmt = QFMT_VFS_V0;
set_qf_format:
if ((sb_any_quota_enabled(sb) ||
sb_any_quota_suspended(sb)) &&
if (sb_any_quota_loaded(sb) &&
sbi->s_jquota_fmt != qfmt) {
printk(KERN_ERR "EXT4-fs: Cannot change "
"journaled quota options when "
......@@ -1222,7 +1221,7 @@ static int parse_options(char *options, struct super_block *sb,
set_opt(sbi->s_mount_opt, GRPQUOTA);
break;
case Opt_noquota:
if (sb_any_quota_enabled(sb)) {
if (sb_any_quota_loaded(sb)) {
printk(KERN_ERR "EXT4-fs: Cannot change quota "
"options when quota turned on.\n");
return 0;
......
......@@ -509,6 +509,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
if (is_journal_aborted(journal)) {
clear_buffer_jbddirty(jh2bh(jh));
JBUFFER_TRACE(jh, "journal is aborting: refile");
jbd2_buffer_abort_trigger(jh,
jh->b_frozen_data ?
jh->b_frozen_triggers :
jh->b_triggers);
jbd2_journal_refile_buffer(journal, jh);
/* If that was the last one, we need to clean up
* any descriptor buffers which may have been
......@@ -844,6 +848,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
* data.
*
* Otherwise, we can just throw away the frozen data now.
*
* We also know that the frozen data has already fired
* its triggers if they exist, so we can clear that too.
*/
if (jh->b_committed_data) {
jbd2_free(jh->b_committed_data, bh->b_size);
......@@ -851,10 +858,12 @@ void jbd2_journal_commit_transaction(journal_t *journal)
if (jh->b_frozen_data) {
jh->b_committed_data = jh->b_frozen_data;
jh->b_frozen_data = NULL;
jh->b_frozen_triggers = NULL;
}
} else if (jh->b_frozen_data) {
jbd2_free(jh->b_frozen_data, bh->b_size);
jh->b_frozen_data = NULL;
jh->b_frozen_triggers = NULL;
}
spin_lock(&journal->j_list_lock);
......
......@@ -50,6 +50,7 @@ EXPORT_SYMBOL(jbd2_journal_unlock_updates);
EXPORT_SYMBOL(jbd2_journal_get_write_access);
EXPORT_SYMBOL(jbd2_journal_get_create_access);
EXPORT_SYMBOL(jbd2_journal_get_undo_access);
EXPORT_SYMBOL(jbd2_journal_set_triggers);
EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
EXPORT_SYMBOL(jbd2_journal_release_buffer);
EXPORT_SYMBOL(jbd2_journal_forget);
......@@ -290,6 +291,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
struct page *new_page;
unsigned int new_offset;
struct buffer_head *bh_in = jh2bh(jh_in);
struct jbd2_buffer_trigger_type *triggers;
/*
* The buffer really shouldn't be locked: only the current committing
......@@ -314,12 +316,22 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
done_copy_out = 1;
new_page = virt_to_page(jh_in->b_frozen_data);
new_offset = offset_in_page(jh_in->b_frozen_data);
triggers = jh_in->b_frozen_triggers;
} else {
new_page = jh2bh(jh_in)->b_page;
new_offset = offset_in_page(jh2bh(jh_in)->b_data);
triggers = jh_in->b_triggers;
}
mapped_data = kmap_atomic(new_page, KM_USER0);
/*
* Fire any commit trigger. Do this before checking for escaping,
* as the trigger may modify the magic offset. If a copy-out
* happens afterwards, it will have the correct data in the buffer.
*/
jbd2_buffer_commit_trigger(jh_in, mapped_data + new_offset,
triggers);
/*
* Check for escaping
*/
......@@ -352,6 +364,13 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
new_page = virt_to_page(tmp);
new_offset = offset_in_page(tmp);
done_copy_out = 1;
/*
* This isn't strictly necessary, as we're using frozen
* data for the escaping, but it keeps consistency with
* b_frozen_data usage.
*/
jh_in->b_frozen_triggers = jh_in->b_triggers;
}
/*
......
......@@ -741,6 +741,12 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
source = kmap_atomic(page, KM_USER0);
memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
kunmap_atomic(source, KM_USER0);
/*
* Now that the frozen data is saved off, we need to store
* any matching triggers.
*/
jh->b_frozen_triggers = jh->b_triggers;
}
jbd_unlock_bh_state(bh);
......@@ -943,6 +949,47 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
return err;
}
/**
* void jbd2_journal_set_triggers() - Add triggers for commit writeout
* @bh: buffer to trigger on
* @type: struct jbd2_buffer_trigger_type containing the trigger(s).
*
* Set any triggers on this journal_head. This is always safe, because
* triggers for a committing buffer will be saved off, and triggers for
* a running transaction will match the buffer in that transaction.
*
* Call with NULL to clear the triggers.
*/
void jbd2_journal_set_triggers(struct buffer_head *bh,
struct jbd2_buffer_trigger_type *type)
{
struct journal_head *jh = bh2jh(bh);
jh->b_triggers = type;
}
void jbd2_buffer_commit_trigger(struct journal_head *jh, void *mapped_data,
struct jbd2_buffer_trigger_type *triggers)
{
struct buffer_head *bh = jh2bh(jh);
if (!triggers || !triggers->t_commit)
return;
triggers->t_commit(triggers, bh, mapped_data, bh->b_size);
}
void jbd2_buffer_abort_trigger(struct journal_head *jh,
struct jbd2_buffer_trigger_type *triggers)
{
if (!triggers || !triggers->t_abort)
return;
triggers->t_abort(triggers, jh2bh(jh));
}
/**
* int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata
* @handle: transaction to add buffer to.
......
......@@ -12,6 +12,7 @@ obj-$(CONFIG_OCFS2_FS_USERSPACE_CLUSTER) += ocfs2_stack_user.o
ocfs2-objs := \
alloc.o \
aops.o \
blockcheck.o \
buffer_head_io.o \
dcache.o \
dir.o \
......@@ -35,8 +36,14 @@ ocfs2-objs := \
sysfile.o \
uptodate.o \
ver.o \
quota_local.o \
quota_global.o \
xattr.o
ifeq ($(CONFIG_OCFS2_FS_POSIX_ACL),y)
ocfs2-objs += acl.o
endif
ocfs2_stackglue-objs := stackglue.o
ocfs2_stack_o2cb-objs := stack_o2cb.o
ocfs2_stack_user-objs := stack_user.o
......
/* -*- mode: c; c-basic-offset: 8; -*-
* vim: noexpandtab sw=8 ts=8 sts=0:
*
* acl.c
*
* Copyright (C) 2004, 2008 Oracle. All rights reserved.
*
* CREDITS:
* Lots of code in this file is copy from linux/fs/ext3/acl.c.
* Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/string.h>
#define MLOG_MASK_PREFIX ML_INODE
#include <cluster/masklog.h>
#include "ocfs2.h"
#include "alloc.h"
#include "dlmglue.h"
#include "file.h"
#include "ocfs2_fs.h"
#include "xattr.h"
#include "acl.h"
/*
* Convert from xattr value to acl struct.
*/
static struct posix_acl *ocfs2_acl_from_xattr(const void *value, size_t size)
{
int n, count;
struct posix_acl *acl;
if (!value)
return NULL;
if (size < sizeof(struct posix_acl_entry))
return ERR_PTR(-EINVAL);
count = size / sizeof(struct posix_acl_entry);
if (count < 0)
return ERR_PTR(-EINVAL);
if (count == 0)
return NULL;
acl = posix_acl_alloc(count, GFP_NOFS);
if (!acl)
return ERR_PTR(-ENOMEM);
for (n = 0; n < count; n++) {
struct ocfs2_acl_entry *entry =
(struct ocfs2_acl_entry *)value;
acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag);
acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
acl->a_entries[n].e_id = le32_to_cpu(entry->e_id);
value += sizeof(struct posix_acl_entry);
}
return acl;
}
/*
* Convert acl struct to xattr value.
*/
static void *ocfs2_acl_to_xattr(const struct posix_acl *acl, size_t *size)
{
struct ocfs2_acl_entry *entry = NULL;
char *ocfs2_acl;
size_t n;
*size = acl->a_count * sizeof(struct posix_acl_entry);
ocfs2_acl = kmalloc(*size, GFP_NOFS);
if (!ocfs2_acl)
return ERR_PTR(-ENOMEM);
entry = (struct ocfs2_acl_entry *)ocfs2_acl;
for (n = 0; n < acl->a_count; n++, entry++) {
entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
entry->e_id = cpu_to_le32(acl->a_entries[n].e_id);
}
return ocfs2_acl;
}
static struct posix_acl *ocfs2_get_acl_nolock(struct inode *inode,
int type,
struct buffer_head *di_bh)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
int name_index;
char *value = NULL;
struct posix_acl *acl;
int retval;
if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
return NULL;
switch (type) {
case ACL_TYPE_ACCESS:
name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS;
break;
case ACL_TYPE_DEFAULT:
name_index = OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
break;
default:
return ERR_PTR(-EINVAL);
}
retval = ocfs2_xattr_get_nolock(inode, di_bh, name_index, "", NULL, 0);
if (retval > 0) {
value = kmalloc(retval, GFP_NOFS);
if (!value)
return ERR_PTR(-ENOMEM);
retval = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
"", value, retval);
}
if (retval > 0)
acl = ocfs2_acl_from_xattr(value, retval);
else if (retval == -ENODATA || retval == 0)
acl = NULL;
else
acl = ERR_PTR(retval);
kfree(value);
return acl;
}
/*
* Get posix acl.
*/
static struct posix_acl *ocfs2_get_acl(struct inode *inode, int type)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct buffer_head *di_bh = NULL;
struct posix_acl *acl;
int ret;
if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
return NULL;
ret = ocfs2_inode_lock(inode, &di_bh, 0);
if (ret < 0) {
mlog_errno(ret);
acl = ERR_PTR(ret);
return acl;
}
acl = ocfs2_get_acl_nolock(inode, type, di_bh);
ocfs2_inode_unlock(inode, 0);
brelse(di_bh);
return acl;
}
/*
* Set the access or default ACL of an inode.
*/
static int ocfs2_set_acl(handle_t *handle,
struct inode *inode,
struct buffer_head *di_bh,
int type,
struct posix_acl *acl,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_alloc_context *data_ac)
{
int name_index;
void *value = NULL;
size_t size = 0;
int ret;
if (S_ISLNK(inode->i_mode))
return -EOPNOTSUPP;
switch (type) {
case ACL_TYPE_ACCESS:
name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS;
if (acl) {
mode_t mode = inode->i_mode;
ret = posix_acl_equiv_mode(acl, &mode);
if (ret < 0)
return ret;
else {
inode->i_mode = mode;
if (ret == 0)
acl = NULL;
}
}
break;
case ACL_TYPE_DEFAULT:
name_index = OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
if (!S_ISDIR(inode->i_mode))
return acl ? -EACCES : 0;
break;
default:
return -EINVAL;
}
if (acl) {
value = ocfs2_acl_to_xattr(acl, &size);
if (IS_ERR(value))
return (int)PTR_ERR(value);
}
if (handle)
ret = ocfs2_xattr_set_handle(handle, inode, di_bh, name_index,
"", value, size, 0,
meta_ac, data_ac);
else
ret = ocfs2_xattr_set(inode, name_index, "", value, size, 0);
kfree(value);
return ret;
}
int ocfs2_check_acl(struct inode *inode, int mask)
{
struct posix_acl *acl = ocfs2_get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl) {
int ret = posix_acl_permission(inode, acl, mask);
posix_acl_release(acl);
return ret;
}
return -EAGAIN;
}
int ocfs2_acl_chmod(struct inode *inode)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct posix_acl *acl, *clone;
int ret;
if (S_ISLNK(inode->i_mode))
return -EOPNOTSUPP;
if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
return 0;
acl = ocfs2_get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(acl) || !acl)
return PTR_ERR(acl);
clone = posix_acl_clone(acl, GFP_KERNEL);
posix_acl_release(acl);
if (!clone)
return -ENOMEM;
ret = posix_acl_chmod_masq(clone, inode->i_mode);
if (!ret)
ret = ocfs2_set_acl(NULL, inode, NULL, ACL_TYPE_ACCESS,
clone, NULL, NULL);
posix_acl_release(clone);
return ret;
}
/*
* Initialize the ACLs of a new inode. If parent directory has default ACL,
* then clone to new inode. Called from ocfs2_mknod.
*/
int ocfs2_init_acl(handle_t *handle,
struct inode *inode,
struct inode *dir,
struct buffer_head *di_bh,
struct buffer_head *dir_bh,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_alloc_context *data_ac)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct posix_acl *acl = NULL;
int ret = 0;
if (!S_ISLNK(inode->i_mode)) {
if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
acl = ocfs2_get_acl_nolock(dir, ACL_TYPE_DEFAULT,
dir_bh);
if (IS_ERR(acl))
return PTR_ERR(acl);
}
if (!acl)
inode->i_mode &= ~current->fs->umask;
}
if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) {
struct posix_acl *clone;
mode_t mode;
if (S_ISDIR(inode->i_mode)) {
ret = ocfs2_set_acl(handle, inode, di_bh,
ACL_TYPE_DEFAULT, acl,
meta_ac, data_ac);
if (ret)
goto cleanup;
}
clone = posix_acl_clone(acl, GFP_NOFS);
ret = -ENOMEM;
if (!clone)
goto cleanup;
mode = inode->i_mode;
ret = posix_acl_create_masq(clone, &mode);
if (ret >= 0) {
inode->i_mode = mode;
if (ret > 0) {
ret = ocfs2_set_acl(handle, inode,
di_bh, ACL_TYPE_ACCESS,
clone, meta_ac, data_ac);
}
}
posix_acl_release(clone);
}
cleanup:
posix_acl_release(acl);
return ret;
}
static size_t ocfs2_xattr_list_acl_access(struct inode *inode,
char *list,
size_t list_len,
const char *name,
size_t name_len)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
return 0;
if (list && size <= list_len)
memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
return size;
}
static size_t ocfs2_xattr_list_acl_default(struct inode *inode,
char *list,
size_t list_len,
const char *name,
size_t name_len)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
return 0;
if (list && size <= list_len)
memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
return size;
}
static int ocfs2_xattr_get_acl(struct inode *inode,
int type,
void *buffer,
size_t size)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct posix_acl *acl;
int ret;
if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
return -EOPNOTSUPP;
acl = ocfs2_get_acl(inode, type);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl == NULL)
return -ENODATA;
ret = posix_acl_to_xattr(acl, buffer, size);
posix_acl_release(acl);
return ret;
}
static int ocfs2_xattr_get_acl_access(struct inode *inode,
const char *name,
void *buffer,
size_t size)
{
if (strcmp(name, "") != 0)
return -EINVAL;
return ocfs2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
}
static int ocfs2_xattr_get_acl_default(struct inode *inode,
const char *name,
void *buffer,
size_t size)
{
if (strcmp(name, "") != 0)
return -EINVAL;
return ocfs2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
}
static int ocfs2_xattr_set_acl(struct inode *inode,
int type,
const void *value,
size_t size)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct posix_acl *acl;
int ret = 0;
if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
return -EOPNOTSUPP;
if (!is_owner_or_cap(inode))
return -EPERM;
if (value) {
acl = posix_acl_from_xattr(value, size);
if (IS_ERR(acl))
return PTR_ERR(acl);
else if (acl) {
ret = posix_acl_valid(acl);
if (ret)
goto cleanup;
}
} else
acl = NULL;
ret = ocfs2_set_acl(NULL, inode, NULL, type, acl, NULL, NULL);
cleanup:
posix_acl_release(acl);
return ret;
}
static int ocfs2_xattr_set_acl_access(struct inode *inode,
const char *name,
const void *value,
size_t size,
int flags)
{
if (strcmp(name, "") != 0)
return -EINVAL;
return ocfs2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
}
static int ocfs2_xattr_set_acl_default(struct inode *inode,
const char *name,
const void *value,
size_t size,
int flags)
{
if (strcmp(name, "") != 0)
return -EINVAL;
return ocfs2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
}
struct xattr_handler ocfs2_xattr_acl_access_handler = {
.prefix = POSIX_ACL_XATTR_ACCESS,
.list = ocfs2_xattr_list_acl_access,
.get = ocfs2_xattr_get_acl_access,
.set = ocfs2_xattr_set_acl_access,
};
struct xattr_handler ocfs2_xattr_acl_default_handler = {
.prefix = POSIX_ACL_XATTR_DEFAULT,
.list = ocfs2_xattr_list_acl_default,
.get = ocfs2_xattr_get_acl_default,
.set = ocfs2_xattr_set_acl_default,
};
/* -*- mode: c; c-basic-offset: 8; -*-
* vim: noexpandtab sw=8 ts=8 sts=0:
*
* acl.h
*
* Copyright (C) 2004, 2008 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#ifndef OCFS2_ACL_H
#define OCFS2_ACL_H
#include <linux/posix_acl_xattr.h>
struct ocfs2_acl_entry {
__le16 e_tag;
__le16 e_perm;
__le32 e_id;
};
#ifdef CONFIG_OCFS2_FS_POSIX_ACL
extern int ocfs2_check_acl(struct inode *, int);
extern int ocfs2_acl_chmod(struct inode *);
extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
struct buffer_head *, struct buffer_head *,
struct ocfs2_alloc_context *,
struct ocfs2_alloc_context *);
#else /* CONFIG_OCFS2_FS_POSIX_ACL*/
#define ocfs2_check_acl NULL
static inline int ocfs2_acl_chmod(struct inode *inode)
{
return 0;
}
static inline int ocfs2_init_acl(handle_t *handle,
struct inode *inode,
struct inode *dir,
struct buffer_head *di_bh,
struct buffer_head *dir_bh,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_alloc_context *data_ac)
{
return 0;
}
#endif /* CONFIG_OCFS2_FS_POSIX_ACL*/
#endif /* OCFS2_ACL_H */
此差异已折叠。
......@@ -45,7 +45,9 @@
*
* ocfs2_extent_tree contains info for the root of the b-tree, it must have a
* root ocfs2_extent_list and a root_bh so that they can be used in the b-tree
* functions.
* functions. With metadata ecc, we now call different journal_access
* functions for each type of metadata, so it must have the
* root_journal_access function.
* ocfs2_extent_tree_operations abstract the normal operations we do for
* the root of extent b-tree.
*/
......@@ -54,6 +56,7 @@ struct ocfs2_extent_tree {
struct ocfs2_extent_tree_operations *et_ops;
struct buffer_head *et_root_bh;
struct ocfs2_extent_list *et_root_el;
ocfs2_journal_access_func et_root_journal_access;
void *et_object;
unsigned int et_max_leaf_clusters;
};
......@@ -68,10 +71,18 @@ void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct buffer_head *bh);
struct ocfs2_xattr_value_buf;
void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
struct inode *inode,
struct buffer_head *bh,
struct ocfs2_xattr_value_root *xv);
struct ocfs2_xattr_value_buf *vb);
/*
* Read an extent block into *bh. If *bh is NULL, a bh will be
* allocated. This is a cached read. The extent block will be validated
* with ocfs2_validate_extent_block().
*/
int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno,
struct buffer_head **bh);
struct ocfs2_alloc_context;
int ocfs2_insert_extent(struct ocfs2_super *osb,
......@@ -110,6 +121,11 @@ int ocfs2_remove_extent(struct inode *inode,
u32 cpos, u32 len, handle_t *handle,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_cached_dealloc_ctxt *dealloc);
int ocfs2_remove_btree_range(struct inode *inode,
struct ocfs2_extent_tree *et,
u32 cpos, u32 phys_cpos, u32 len,
struct ocfs2_cached_dealloc_ctxt *dealloc);
int ocfs2_num_free_extents(struct ocfs2_super *osb,
struct inode *inode,
struct ocfs2_extent_tree *et);
......@@ -167,10 +183,18 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb);
*/
struct ocfs2_cached_dealloc_ctxt {
struct ocfs2_per_slot_free_list *c_first_suballocator;
struct ocfs2_cached_block_free *c_global_allocator;
};
static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c)
{
c->c_first_suballocator = NULL;
c->c_global_allocator = NULL;
}
int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
u64 blkno, unsigned int bit);
static inline int ocfs2_dealloc_has_cluster(struct ocfs2_cached_dealloc_ctxt *c)
{
return c->c_global_allocator != NULL;
}
int ocfs2_run_deallocs(struct ocfs2_super *osb,
struct ocfs2_cached_dealloc_ctxt *ctxt);
......
......@@ -27,6 +27,7 @@
#include <linux/swap.h>
#include <linux/pipe_fs_i.h>
#include <linux/mpage.h>
#include <linux/quotaops.h>
#define MLOG_MASK_PREFIX ML_FILE_IO
#include <cluster/masklog.h>
......@@ -68,20 +69,13 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
goto bail;
}
status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh);
status = ocfs2_read_inode_block(inode, &bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
fe = (struct ocfs2_dinode *) bh->b_data;
if (!OCFS2_IS_VALID_DINODE(fe)) {
mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
(unsigned long long)le64_to_cpu(fe->i_blkno), 7,
fe->i_signature);
goto bail;
}
if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb,
le32_to_cpu(fe->i_clusters))) {
mlog(ML_ERROR, "block offset is outside the allocated size: "
......@@ -262,7 +256,7 @@ static int ocfs2_readpage_inline(struct inode *inode, struct page *page)
BUG_ON(!PageLocked(page));
BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL));
ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh);
ret = ocfs2_read_inode_block(inode, &di_bh);
if (ret) {
mlog_errno(ret);
goto out;
......@@ -481,12 +475,6 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
if (ocfs2_should_order_data(inode)) {
ret = ocfs2_jbd2_file_inode(handle, inode);
#ifdef CONFIG_OCFS2_COMPAT_JBD
ret = walk_page_buffers(handle,
page_buffers(page),
from, to, NULL,
ocfs2_journal_dirty_data);
#endif
if (ret < 0)
mlog_errno(ret);
}
......@@ -1072,15 +1060,8 @@ static void ocfs2_write_failure(struct inode *inode,
tmppage = wc->w_pages[i];
if (page_has_buffers(tmppage)) {
if (ocfs2_should_order_data(inode)) {
if (ocfs2_should_order_data(inode))
ocfs2_jbd2_file_inode(wc->w_handle, inode);
#ifdef CONFIG_OCFS2_COMPAT_JBD
walk_page_buffers(wc->w_handle,
page_buffers(tmppage),
from, to, NULL,
ocfs2_journal_dirty_data);
#endif
}
block_commit_write(tmppage, from, to);
}
......@@ -1531,7 +1512,7 @@ static int ocfs2_write_begin_inline(struct address_space *mapping,
goto out;
}
ret = ocfs2_journal_access(handle, inode, wc->w_di_bh,
ret = ocfs2_journal_access_di(handle, inode, wc->w_di_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
ocfs2_commit_trans(osb, handle);
......@@ -1750,15 +1731,20 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
wc->w_handle = handle;
if (clusters_to_alloc && vfs_dq_alloc_space_nodirty(inode,
ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc))) {
ret = -EDQUOT;
goto out_commit;
}
/*
* We don't want this to fail in ocfs2_write_end(), so do it
* here.
*/
ret = ocfs2_journal_access(handle, inode, wc->w_di_bh,
ret = ocfs2_journal_access_di(handle, inode, wc->w_di_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
goto out_commit;
goto out_quota;
}
/*
......@@ -1771,14 +1757,14 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
mmap_page);
if (ret) {
mlog_errno(ret);
goto out_commit;
goto out_quota;
}
ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos,
len);
if (ret) {
mlog_errno(ret);
goto out_commit;
goto out_quota;
}
if (data_ac)
......@@ -1790,6 +1776,10 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
*pagep = wc->w_target_page;
*fsdata = wc;
return 0;
out_quota:
if (clusters_to_alloc)
vfs_dq_free_space(inode,
ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc));
out_commit:
ocfs2_commit_trans(osb, handle);
......@@ -1919,15 +1909,8 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
}
if (page_has_buffers(tmppage)) {
if (ocfs2_should_order_data(inode)) {
if (ocfs2_should_order_data(inode))
ocfs2_jbd2_file_inode(wc->w_handle, inode);
#ifdef CONFIG_OCFS2_COMPAT_JBD
walk_page_buffers(wc->w_handle,
page_buffers(tmppage),
from, to, NULL,
ocfs2_journal_dirty_data);
#endif
}
block_commit_write(tmppage, from, to);
}
}
......
/* -*- mode: c; c-basic-offset: 8; -*-
* vim: noexpandtab sw=8 ts=8 sts=0:
*
* blockcheck.c
*
* Checksum and ECC codes for the OCFS2 userspace library.
*
* Copyright (C) 2006, 2008 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License, version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/crc32.h>
#include <linux/buffer_head.h>
#include <linux/bitops.h>
#include <asm/byteorder.h>
#include <cluster/masklog.h>
#include "ocfs2.h"
#include "blockcheck.h"
/*
* We use the following conventions:
*
* d = # data bits
* p = # parity bits
* c = # total code bits (d + p)
*/
/*
* Calculate the bit offset in the hamming code buffer based on the bit's
* offset in the data buffer. Since the hamming code reserves all
* power-of-two bits for parity, the data bit number and the code bit
* number are offest by all the parity bits beforehand.
*
* Recall that bit numbers in hamming code are 1-based. This function
* takes the 0-based data bit from the caller.
*
* An example. Take bit 1 of the data buffer. 1 is a power of two (2^0),
* so it's a parity bit. 2 is a power of two (2^1), so it's a parity bit.
* 3 is not a power of two. So bit 1 of the data buffer ends up as bit 3
* in the code buffer.
*
* The caller can pass in *p if it wants to keep track of the most recent
* number of parity bits added. This allows the function to start the
* calculation at the last place.
*/
static unsigned int calc_code_bit(unsigned int i, unsigned int *p_cache)
{
unsigned int b, p = 0;
/*
* Data bits are 0-based, but we're talking code bits, which
* are 1-based.
*/
b = i + 1;
/* Use the cache if it is there */
if (p_cache)
p = *p_cache;
b += p;
/*
* For every power of two below our bit number, bump our bit.
*
* We compare with (b + 1) because we have to compare with what b
* would be _if_ it were bumped up by the parity bit. Capice?
*
* p is set above.
*/
for (; (1 << p) < (b + 1); p++)
b++;
if (p_cache)
*p_cache = p;
return b;
}
/*
* This is the low level encoder function. It can be called across
* multiple hunks just like the crc32 code. 'd' is the number of bits
* _in_this_hunk_. nr is the bit offset of this hunk. So, if you had
* two 512B buffers, you would do it like so:
*
* parity = ocfs2_hamming_encode(0, buf1, 512 * 8, 0);
* parity = ocfs2_hamming_encode(parity, buf2, 512 * 8, 512 * 8);
*
* If you just have one buffer, use ocfs2_hamming_encode_block().
*/
u32 ocfs2_hamming_encode(u32 parity, void *data, unsigned int d, unsigned int nr)
{
unsigned int i, b, p = 0;
BUG_ON(!d);
/*
* b is the hamming code bit number. Hamming code specifies a
* 1-based array, but C uses 0-based. So 'i' is for C, and 'b' is
* for the algorithm.
*
* The i++ in the for loop is so that the start offset passed
* to ocfs2_find_next_bit_set() is one greater than the previously
* found bit.
*/
for (i = 0; (i = ocfs2_find_next_bit(data, d, i)) < d; i++)
{
/*
* i is the offset in this hunk, nr + i is the total bit
* offset.
*/
b = calc_code_bit(nr + i, &p);
/*
* Data bits in the resultant code are checked by
* parity bits that are part of the bit number
* representation. Huh?
*
* <wikipedia href="http://en.wikipedia.org/wiki/Hamming_code">
* In other words, the parity bit at position 2^k
* checks bits in positions having bit k set in
* their binary representation. Conversely, for
* instance, bit 13, i.e. 1101(2), is checked by
* bits 1000(2) = 8, 0100(2)=4 and 0001(2) = 1.
* </wikipedia>
*
* Note that 'k' is the _code_ bit number. 'b' in
* our loop.
*/
parity ^= b;
}
/* While the data buffer was treated as little endian, the
* return value is in host endian. */
return parity;
}
u32 ocfs2_hamming_encode_block(void *data, unsigned int blocksize)
{
return ocfs2_hamming_encode(0, data, blocksize * 8, 0);
}
/*
* Like ocfs2_hamming_encode(), this can handle hunks. nr is the bit
* offset of the current hunk. If bit to be fixed is not part of the
* current hunk, this does nothing.
*
* If you only have one hunk, use ocfs2_hamming_fix_block().
*/
void ocfs2_hamming_fix(void *data, unsigned int d, unsigned int nr,
unsigned int fix)
{
unsigned int i, b;
BUG_ON(!d);
/*
* If the bit to fix has an hweight of 1, it's a parity bit. One
* busted parity bit is its own error. Nothing to do here.
*/
if (hweight32(fix) == 1)
return;
/*
* nr + d is the bit right past the data hunk we're looking at.
* If fix after that, nothing to do
*/
if (fix >= calc_code_bit(nr + d, NULL))
return;
/*
* nr is the offset in the data hunk we're starting at. Let's
* start b at the offset in the code buffer. See hamming_encode()
* for a more detailed description of 'b'.
*/
b = calc_code_bit(nr, NULL);
/* If the fix is before this hunk, nothing to do */
if (fix < b)
return;
for (i = 0; i < d; i++, b++)
{
/* Skip past parity bits */
while (hweight32(b) == 1)
b++;
/*
* i is the offset in this data hunk.
* nr + i is the offset in the total data buffer.
* b is the offset in the total code buffer.
*
* Thus, when b == fix, bit i in the current hunk needs
* fixing.
*/
if (b == fix)
{
if (ocfs2_test_bit(i, data))
ocfs2_clear_bit(i, data);
else
ocfs2_set_bit(i, data);
break;
}
}
}
void ocfs2_hamming_fix_block(void *data, unsigned int blocksize,
unsigned int fix)
{
ocfs2_hamming_fix(data, blocksize * 8, 0, fix);
}
/*
* This function generates check information for a block.
* data is the block to be checked. bc is a pointer to the
* ocfs2_block_check structure describing the crc32 and the ecc.
*
* bc should be a pointer inside data, as the function will
* take care of zeroing it before calculating the check information. If
* bc does not point inside data, the caller must make sure any inline
* ocfs2_block_check structures are zeroed.
*
* The data buffer must be in on-disk endian (little endian for ocfs2).
* bc will be filled with little-endian values and will be ready to go to
* disk.
*/
void ocfs2_block_check_compute(void *data, size_t blocksize,
struct ocfs2_block_check *bc)
{
u32 crc;
u32 ecc;
memset(bc, 0, sizeof(struct ocfs2_block_check));
crc = crc32_le(~0, data, blocksize);
ecc = ocfs2_hamming_encode_block(data, blocksize);
/*
* No ecc'd ocfs2 structure is larger than 4K, so ecc will be no
* larger than 16 bits.
*/
BUG_ON(ecc > USHORT_MAX);
bc->bc_crc32e = cpu_to_le32(crc);
bc->bc_ecc = cpu_to_le16((u16)ecc);
}
/*
* This function validates existing check information. Like _compute,
* the function will take care of zeroing bc before calculating check codes.
* If bc is not a pointer inside data, the caller must have zeroed any
* inline ocfs2_block_check structures.
*
* Again, the data passed in should be the on-disk endian.
*/
int ocfs2_block_check_validate(void *data, size_t blocksize,
struct ocfs2_block_check *bc)
{
int rc = 0;
struct ocfs2_block_check check;
u32 crc, ecc;
check.bc_crc32e = le32_to_cpu(bc->bc_crc32e);
check.bc_ecc = le16_to_cpu(bc->bc_ecc);
memset(bc, 0, sizeof(struct ocfs2_block_check));
/* Fast path - if the crc32 validates, we're good to go */
crc = crc32_le(~0, data, blocksize);
if (crc == check.bc_crc32e)
goto out;
mlog(ML_ERROR,
"CRC32 failed: stored: %u, computed %u. Applying ECC.\n",
(unsigned int)check.bc_crc32e, (unsigned int)crc);
/* Ok, try ECC fixups */
ecc = ocfs2_hamming_encode_block(data, blocksize);
ocfs2_hamming_fix_block(data, blocksize, ecc ^ check.bc_ecc);
/* And check the crc32 again */
crc = crc32_le(~0, data, blocksize);
if (crc == check.bc_crc32e)
goto out;
mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",
(unsigned int)check.bc_crc32e, (unsigned int)crc);
rc = -EIO;
out:
bc->bc_crc32e = cpu_to_le32(check.bc_crc32e);
bc->bc_ecc = cpu_to_le16(check.bc_ecc);
return rc;
}
/*
* This function generates check information for a list of buffer_heads.
* bhs is the blocks to be checked. bc is a pointer to the
* ocfs2_block_check structure describing the crc32 and the ecc.
*
* bc should be a pointer inside data, as the function will
* take care of zeroing it before calculating the check information. If
* bc does not point inside data, the caller must make sure any inline
* ocfs2_block_check structures are zeroed.
*
* The data buffer must be in on-disk endian (little endian for ocfs2).
* bc will be filled with little-endian values and will be ready to go to
* disk.
*/
void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr,
struct ocfs2_block_check *bc)
{
int i;
u32 crc, ecc;
BUG_ON(nr < 0);
if (!nr)
return;
memset(bc, 0, sizeof(struct ocfs2_block_check));
for (i = 0, crc = ~0, ecc = 0; i < nr; i++) {
crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
/*
* The number of bits in a buffer is obviously b_size*8.
* The offset of this buffer is b_size*i, so the bit offset
* of this buffer is b_size*8*i.
*/
ecc = (u16)ocfs2_hamming_encode(ecc, bhs[i]->b_data,
bhs[i]->b_size * 8,
bhs[i]->b_size * 8 * i);
}
/*
* No ecc'd ocfs2 structure is larger than 4K, so ecc will be no
* larger than 16 bits.
*/
BUG_ON(ecc > USHORT_MAX);
bc->bc_crc32e = cpu_to_le32(crc);
bc->bc_ecc = cpu_to_le16((u16)ecc);
}
/*
* This function validates existing check information on a list of
* buffer_heads. Like _compute_bhs, the function will take care of
* zeroing bc before calculating check codes. If bc is not a pointer
* inside data, the caller must have zeroed any inline
* ocfs2_block_check structures.
*
* Again, the data passed in should be the on-disk endian.
*/
int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
struct ocfs2_block_check *bc)
{
int i, rc = 0;
struct ocfs2_block_check check;
u32 crc, ecc, fix;
BUG_ON(nr < 0);
if (!nr)
return 0;
check.bc_crc32e = le32_to_cpu(bc->bc_crc32e);
check.bc_ecc = le16_to_cpu(bc->bc_ecc);
memset(bc, 0, sizeof(struct ocfs2_block_check));
/* Fast path - if the crc32 validates, we're good to go */
for (i = 0, crc = ~0; i < nr; i++)
crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
if (crc == check.bc_crc32e)
goto out;
mlog(ML_ERROR,
"CRC32 failed: stored: %u, computed %u. Applying ECC.\n",
(unsigned int)check.bc_crc32e, (unsigned int)crc);
/* Ok, try ECC fixups */
for (i = 0, ecc = 0; i < nr; i++) {
/*
* The number of bits in a buffer is obviously b_size*8.
* The offset of this buffer is b_size*i, so the bit offset
* of this buffer is b_size*8*i.
*/
ecc = (u16)ocfs2_hamming_encode(ecc, bhs[i]->b_data,
bhs[i]->b_size * 8,
bhs[i]->b_size * 8 * i);
}
fix = ecc ^ check.bc_ecc;
for (i = 0; i < nr; i++) {
/*
* Try the fix against each buffer. It will only affect
* one of them.
*/
ocfs2_hamming_fix(bhs[i]->b_data, bhs[i]->b_size * 8,
bhs[i]->b_size * 8 * i, fix);
}
/* And check the crc32 again */
for (i = 0, crc = ~0; i < nr; i++)
crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
if (crc == check.bc_crc32e)
goto out;
mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",
(unsigned int)check.bc_crc32e, (unsigned int)crc);
rc = -EIO;
out:
bc->bc_crc32e = cpu_to_le32(check.bc_crc32e);
bc->bc_ecc = cpu_to_le16(check.bc_ecc);
return rc;
}
/*
* These are the main API. They check the superblock flag before
* calling the underlying operations.
*
* They expect the buffer(s) to be in disk format.
*/
void ocfs2_compute_meta_ecc(struct super_block *sb, void *data,
struct ocfs2_block_check *bc)
{
if (ocfs2_meta_ecc(OCFS2_SB(sb)))
ocfs2_block_check_compute(data, sb->s_blocksize, bc);
}
int ocfs2_validate_meta_ecc(struct super_block *sb, void *data,
struct ocfs2_block_check *bc)
{
int rc = 0;
if (ocfs2_meta_ecc(OCFS2_SB(sb)))
rc = ocfs2_block_check_validate(data, sb->s_blocksize, bc);
return rc;
}
void ocfs2_compute_meta_ecc_bhs(struct super_block *sb,
struct buffer_head **bhs, int nr,
struct ocfs2_block_check *bc)
{
if (ocfs2_meta_ecc(OCFS2_SB(sb)))
ocfs2_block_check_compute_bhs(bhs, nr, bc);
}
int ocfs2_validate_meta_ecc_bhs(struct super_block *sb,
struct buffer_head **bhs, int nr,
struct ocfs2_block_check *bc)
{
int rc = 0;
if (ocfs2_meta_ecc(OCFS2_SB(sb)))
rc = ocfs2_block_check_validate_bhs(bhs, nr, bc);
return rc;
}
/* -*- mode: c; c-basic-offset: 8; -*-
* vim: noexpandtab sw=8 ts=8 sts=0:
*
* blockcheck.h
*
* Checksum and ECC codes for the OCFS2 userspace library.
*
* Copyright (C) 2004, 2008 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License, version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#ifndef OCFS2_BLOCKCHECK_H
#define OCFS2_BLOCKCHECK_H
/* High level block API */
void ocfs2_compute_meta_ecc(struct super_block *sb, void *data,
struct ocfs2_block_check *bc);
int ocfs2_validate_meta_ecc(struct super_block *sb, void *data,
struct ocfs2_block_check *bc);
void ocfs2_compute_meta_ecc_bhs(struct super_block *sb,
struct buffer_head **bhs, int nr,
struct ocfs2_block_check *bc);
int ocfs2_validate_meta_ecc_bhs(struct super_block *sb,
struct buffer_head **bhs, int nr,
struct ocfs2_block_check *bc);
/* Lower level API */
void ocfs2_block_check_compute(void *data, size_t blocksize,
struct ocfs2_block_check *bc);
int ocfs2_block_check_validate(void *data, size_t blocksize,
struct ocfs2_block_check *bc);
void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr,
struct ocfs2_block_check *bc);
int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
struct ocfs2_block_check *bc);
/*
* Hamming code functions
*/
/*
* Encoding hamming code parity bits for a buffer.
*
* This is the low level encoder function. It can be called across
* multiple hunks just like the crc32 code. 'd' is the number of bits
* _in_this_hunk_. nr is the bit offset of this hunk. So, if you had
* two 512B buffers, you would do it like so:
*
* parity = ocfs2_hamming_encode(0, buf1, 512 * 8, 0);
* parity = ocfs2_hamming_encode(parity, buf2, 512 * 8, 512 * 8);
*
* If you just have one buffer, use ocfs2_hamming_encode_block().
*/
u32 ocfs2_hamming_encode(u32 parity, void *data, unsigned int d,
unsigned int nr);
/*
* Fix a buffer with a bit error. The 'fix' is the original parity
* xor'd with the parity calculated now.
*
* Like ocfs2_hamming_encode(), this can handle hunks. nr is the bit
* offset of the current hunk. If bit to be fixed is not part of the
* current hunk, this does nothing.
*
* If you only have one buffer, use ocfs2_hamming_fix_block().
*/
void ocfs2_hamming_fix(void *data, unsigned int d, unsigned int nr,
unsigned int fix);
/* Convenience wrappers for a single buffer of data */
extern u32 ocfs2_hamming_encode_block(void *data, unsigned int blocksize);
extern void ocfs2_hamming_fix_block(void *data, unsigned int blocksize,
unsigned int fix);
#endif
......@@ -39,6 +39,18 @@
#include "buffer_head_io.h"
/*
* Bits on bh->b_state used by ocfs2.
*
* These MUST be after the JBD2 bits. Hence, we use BH_JBDPrivateStart.
*/
enum ocfs2_state_bits {
BH_NeedsValidate = BH_JBDPrivateStart,
};
/* Expand the magic b_state functions */
BUFFER_FNS(NeedsValidate, needs_validate);
int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
struct inode *inode)
{
......@@ -166,7 +178,9 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
}
int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
struct buffer_head *bhs[], int flags)
struct buffer_head *bhs[], int flags,
int (*validate)(struct super_block *sb,
struct buffer_head *bh))
{
int status = 0;
int i, ignore_cache = 0;
......@@ -298,6 +312,8 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
clear_buffer_uptodate(bh);
get_bh(bh); /* for end_buffer_read_sync() */
if (validate)
set_buffer_needs_validate(bh);
bh->b_end_io = end_buffer_read_sync;
submit_bh(READ, bh);
continue;
......@@ -328,6 +344,20 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
bhs[i] = NULL;
continue;
}
if (buffer_needs_validate(bh)) {
/* We never set NeedsValidate if the
* buffer was held by the journal, so
* that better not have changed */
BUG_ON(buffer_jbd(bh));
clear_buffer_needs_validate(bh);
status = validate(inode->i_sb, bh);
if (status) {
put_bh(bh);
bhs[i] = NULL;
continue;
}
}
}
/* Always set the buffer in the cache, even if it was
......
......@@ -31,21 +31,24 @@
void ocfs2_end_buffer_io_sync(struct buffer_head *bh,
int uptodate);
static inline int ocfs2_read_block(struct inode *inode,
u64 off,
struct buffer_head **bh);
int ocfs2_write_block(struct ocfs2_super *osb,
struct buffer_head *bh,
struct inode *inode);
int ocfs2_read_blocks(struct inode *inode,
u64 block,
int nr,
struct buffer_head *bhs[],
int flags);
int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
unsigned int nr, struct buffer_head *bhs[]);
/*
* If not NULL, validate() will be called on a buffer that is freshly
* read from disk. It will not be called if the buffer was in cache.
* Note that if validate() is being used for this buffer, it needs to
* be set even for a READAHEAD call, as it marks the buffer for later
* validation.
*/
int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
struct buffer_head *bhs[], int flags,
int (*validate)(struct super_block *sb,
struct buffer_head *bh));
int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
struct buffer_head *bh);
......@@ -53,7 +56,9 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
#define OCFS2_BH_READAHEAD 8
static inline int ocfs2_read_block(struct inode *inode, u64 off,
struct buffer_head **bh)
struct buffer_head **bh,
int (*validate)(struct super_block *sb,
struct buffer_head *bh))
{
int status = 0;
......@@ -63,7 +68,7 @@ static inline int ocfs2_read_block(struct inode *inode, u64 off,
goto bail;
}
status = ocfs2_read_blocks(inode, off, 1, bh, 0);
status = ocfs2_read_blocks(inode, off, 1, bh, 0, validate);
bail:
return status;
......
......@@ -110,6 +110,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
define_mask(QUORUM),
define_mask(EXPORT),
define_mask(XATTR),
define_mask(QUOTA),
define_mask(ERROR),
define_mask(NOTICE),
define_mask(KTHREAD),
......
......@@ -113,6 +113,7 @@
#define ML_QUORUM 0x0000000008000000ULL /* net connection quorum */
#define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */
#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */
#define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */
/* bits that are infrequently given and frequently matched in the high word */
#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */
#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */
......
此差异已折叠。
......@@ -83,4 +83,6 @@ int ocfs2_fill_new_dir(struct ocfs2_super *osb,
struct buffer_head *fe_bh,
struct ocfs2_alloc_context *data_ac);
struct ocfs2_dir_block_trailer *ocfs2_dir_trailer_from_size(int blocksize,
void *data);
#endif /* OCFS2_DIR_H */
......@@ -275,6 +275,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
struct list_head *iter, *head=NULL;
u64 cookie;
u32 flags;
u8 node;
if (!dlm_grab(dlm)) {
dlm_error(DLM_REJECTED);
......@@ -286,18 +287,21 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
name = past->name;
locklen = past->namelen;
cookie = be64_to_cpu(past->cookie);
cookie = past->cookie;
flags = be32_to_cpu(past->flags);
node = past->node_idx;
if (locklen > DLM_LOCKID_NAME_MAX) {
ret = DLM_IVBUFLEN;
mlog(ML_ERROR, "Invalid name length in proxy ast handler!\n");
mlog(ML_ERROR, "Invalid name length (%d) in proxy ast "
"handler!\n", locklen);
goto leave;
}
if ((flags & (LKM_PUT_LVB|LKM_GET_LVB)) ==
(LKM_PUT_LVB|LKM_GET_LVB)) {
mlog(ML_ERROR, "both PUT and GET lvb specified\n");
mlog(ML_ERROR, "Both PUT and GET lvb specified, (0x%x)\n",
flags);
ret = DLM_BADARGS;
goto leave;
}
......@@ -310,22 +314,21 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
if (past->type != DLM_AST &&
past->type != DLM_BAST) {
mlog(ML_ERROR, "Unknown ast type! %d, cookie=%u:%llu"
"name=%.*s\n", past->type,
dlm_get_lock_cookie_node(cookie),
dlm_get_lock_cookie_seq(cookie),
locklen, name);
"name=%.*s, node=%u\n", past->type,
dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
locklen, name, node);
ret = DLM_IVLOCKID;
goto leave;
}
res = dlm_lookup_lockres(dlm, name, locklen);
if (!res) {
mlog(0, "got %sast for unknown lockres! "
"cookie=%u:%llu, name=%.*s, namelen=%u\n",
past->type == DLM_AST ? "" : "b",
dlm_get_lock_cookie_node(cookie),
dlm_get_lock_cookie_seq(cookie),
locklen, name, locklen);
mlog(0, "Got %sast for unknown lockres! cookie=%u:%llu, "
"name=%.*s, node=%u\n", (past->type == DLM_AST ? "" : "b"),
dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
locklen, name, node);
ret = DLM_IVLOCKID;
goto leave;
}
......@@ -337,12 +340,12 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
spin_lock(&res->spinlock);
if (res->state & DLM_LOCK_RES_RECOVERING) {
mlog(0, "responding with DLM_RECOVERING!\n");
mlog(0, "Responding with DLM_RECOVERING!\n");
ret = DLM_RECOVERING;
goto unlock_out;
}
if (res->state & DLM_LOCK_RES_MIGRATING) {
mlog(0, "responding with DLM_MIGRATING!\n");
mlog(0, "Responding with DLM_MIGRATING!\n");
ret = DLM_MIGRATING;
goto unlock_out;
}
......@@ -351,7 +354,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
lock = NULL;
list_for_each(iter, head) {
lock = list_entry (iter, struct dlm_lock, list);
if (be64_to_cpu(lock->ml.cookie) == cookie)
if (lock->ml.cookie == cookie)
goto do_ast;
}
......@@ -363,15 +366,15 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
list_for_each(iter, head) {
lock = list_entry (iter, struct dlm_lock, list);
if (be64_to_cpu(lock->ml.cookie) == cookie)
if (lock->ml.cookie == cookie)
goto do_ast;
}
mlog(0, "got %sast for unknown lock! cookie=%u:%llu, "
"name=%.*s, namelen=%u\n", past->type == DLM_AST ? "" : "b",
dlm_get_lock_cookie_node(cookie),
dlm_get_lock_cookie_seq(cookie),
locklen, name, locklen);
mlog(0, "Got %sast for unknown lock! cookie=%u:%llu, name=%.*s, "
"node=%u\n", past->type == DLM_AST ? "" : "b",
dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
locklen, name, node);
ret = DLM_NORMAL;
unlock_out:
......@@ -383,7 +386,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
if (past->type == DLM_AST) {
/* do not alter lock refcount. switching lists. */
list_move_tail(&lock->list, &res->granted);
mlog(0, "ast: adding to granted list... type=%d, "
mlog(0, "ast: Adding to granted list... type=%d, "
"convert_type=%d\n", lock->ml.type, lock->ml.convert_type);
if (lock->ml.convert_type != LKM_IVMODE) {
lock->ml.type = lock->ml.convert_type;
......@@ -408,7 +411,6 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
dlm_do_local_bast(dlm, res, lock, past->blocked_type);
leave:
if (res)
dlm_lockres_put(res);
......
......@@ -140,6 +140,7 @@ struct dlm_ctxt
unsigned int purge_count;
spinlock_t spinlock;
spinlock_t ast_lock;
spinlock_t track_lock;
char *name;
u8 node_num;
u32 key;
......@@ -316,6 +317,8 @@ struct dlm_lock_resource
* put on a list for the dlm thread to run. */
unsigned long last_used;
struct dlm_ctxt *dlm;
unsigned migration_pending:1;
atomic_t asts_reserved;
spinlock_t spinlock;
......
......@@ -630,43 +630,38 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos)
{
struct debug_lockres *dl = m->private;
struct dlm_ctxt *dlm = dl->dl_ctxt;
struct dlm_lock_resource *oldres = dl->dl_res;
struct dlm_lock_resource *res = NULL;
struct list_head *track_list;
spin_lock(&dlm->spinlock);
spin_lock(&dlm->track_lock);
if (oldres)
track_list = &oldres->tracking;
else
track_list = &dlm->tracking_list;
if (dl->dl_res) {
list_for_each_entry(res, &dl->dl_res->tracking, tracking) {
if (dl->dl_res) {
dlm_lockres_put(dl->dl_res);
dl->dl_res = NULL;
}
if (&res->tracking == &dlm->tracking_list) {
mlog(0, "End of list found, %p\n", res);
dl = NULL;
break;
}
list_for_each_entry(res, track_list, tracking) {
if (&res->tracking == &dlm->tracking_list)
res = NULL;
else
dlm_lockres_get(res);
dl->dl_res = res;
break;
}
} else {
if (!list_empty(&dlm->tracking_list)) {
list_for_each_entry(res, &dlm->tracking_list, tracking)
break;
dlm_lockres_get(res);
spin_unlock(&dlm->track_lock);
if (oldres)
dlm_lockres_put(oldres);
dl->dl_res = res;
if (res) {
spin_lock(&res->spinlock);
dump_lockres(res, dl->dl_buf, dl->dl_len - 1);
spin_unlock(&res->spinlock);
} else
dl = NULL;
}
if (dl) {
spin_lock(&dl->dl_res->spinlock);
dump_lockres(dl->dl_res, dl->dl_buf, dl->dl_len - 1);
spin_unlock(&dl->dl_res->spinlock);
}
spin_unlock(&dlm->spinlock);
/* passed to seq_show */
return dl;
}
......
......@@ -1550,6 +1550,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
spin_lock_init(&dlm->spinlock);
spin_lock_init(&dlm->master_lock);
spin_lock_init(&dlm->ast_lock);
spin_lock_init(&dlm->track_lock);
INIT_LIST_HEAD(&dlm->list);
INIT_LIST_HEAD(&dlm->dirty_list);
INIT_LIST_HEAD(&dlm->reco.resources);
......
......@@ -505,8 +505,10 @@ void dlm_change_lockres_owner(struct dlm_ctxt *dlm,
static void dlm_lockres_release(struct kref *kref)
{
struct dlm_lock_resource *res;
struct dlm_ctxt *dlm;
res = container_of(kref, struct dlm_lock_resource, refs);
dlm = res->dlm;
/* This should not happen -- all lockres' have a name
* associated with them at init time. */
......@@ -515,6 +517,7 @@ static void dlm_lockres_release(struct kref *kref)
mlog(0, "destroying lockres %.*s\n", res->lockname.len,
res->lockname.name);
spin_lock(&dlm->track_lock);
if (!list_empty(&res->tracking))
list_del_init(&res->tracking);
else {
......@@ -522,6 +525,9 @@ static void dlm_lockres_release(struct kref *kref)
res->lockname.len, res->lockname.name);
dlm_print_one_lock_resource(res);
}
spin_unlock(&dlm->track_lock);
dlm_put(dlm);
if (!hlist_unhashed(&res->hash_node) ||
!list_empty(&res->granted) ||
......@@ -595,6 +601,10 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
res->migration_pending = 0;
res->inflight_locks = 0;
/* put in dlm_lockres_release */
dlm_grab(dlm);
res->dlm = dlm;
kref_init(&res->refs);
/* just for consistency */
......@@ -722,14 +732,21 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
if (tmpres) {
int dropping_ref = 0;
spin_unlock(&dlm->spinlock);
spin_lock(&tmpres->spinlock);
/* We wait for the other thread that is mastering the resource */
if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
__dlm_wait_on_lockres(tmpres);
BUG_ON(tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN);
}
if (tmpres->owner == dlm->node_num) {
BUG_ON(tmpres->state & DLM_LOCK_RES_DROPPING_REF);
dlm_lockres_grab_inflight_ref(dlm, tmpres);
} else if (tmpres->state & DLM_LOCK_RES_DROPPING_REF)
dropping_ref = 1;
spin_unlock(&tmpres->spinlock);
spin_unlock(&dlm->spinlock);
/* wait until done messaging the master, drop our ref to allow
* the lockres to be purged, start over. */
......@@ -2949,7 +2966,7 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
struct dlm_node_iter *iter)
{
struct dlm_migrate_request migrate;
int ret, status = 0;
int ret, skip, status = 0;
int nodenum;
memset(&migrate, 0, sizeof(migrate));
......@@ -2966,12 +2983,27 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
nodenum == new_master)
continue;
/* We could race exit domain. If exited, skip. */
spin_lock(&dlm->spinlock);
skip = (!test_bit(nodenum, dlm->domain_map));
spin_unlock(&dlm->spinlock);
if (skip) {
clear_bit(nodenum, iter->node_map);
continue;
}
ret = o2net_send_message(DLM_MIGRATE_REQUEST_MSG, dlm->key,
&migrate, sizeof(migrate), nodenum,
&status);
if (ret < 0)
mlog_errno(ret);
else if (status < 0) {
if (ret < 0) {
mlog(0, "migrate_request returned %d!\n", ret);
if (!dlm_is_host_down(ret)) {
mlog(ML_ERROR, "unhandled error=%d!\n", ret);
BUG();
}
clear_bit(nodenum, iter->node_map);
ret = 0;
} else if (status < 0) {
mlog(0, "migrate request (node %u) returned %d!\n",
nodenum, status);
ret = status;
......
......@@ -181,7 +181,8 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
spin_lock(&res->spinlock);
/* This ensures that clear refmap is sent after the set */
__dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG);
__dlm_wait_on_lockres_flags(res, (DLM_LOCK_RES_SETREF_INPROG |
DLM_LOCK_RES_MIGRATING));
spin_unlock(&res->spinlock);
/* clear our bit from the master's refmap, ignore errors */
......
此差异已折叠。
......@@ -49,6 +49,19 @@ struct ocfs2_meta_lvb {
__be32 lvb_reserved2;
};
#define OCFS2_QINFO_LVB_VERSION 1
struct ocfs2_qinfo_lvb {
__u8 lvb_version;
__u8 lvb_reserved[3];
__be32 lvb_bgrace;
__be32 lvb_igrace;
__be32 lvb_syncms;
__be32 lvb_blocks;
__be32 lvb_free_blk;
__be32 lvb_free_entry;
};
/* ocfs2_inode_lock_full() 'arg_flags' flags */
/* don't wait on recovery. */
#define OCFS2_META_LOCK_RECOVERY (0x01)
......@@ -69,6 +82,9 @@ void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
struct ocfs2_file_private;
void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
struct ocfs2_file_private *fp);
struct ocfs2_mem_dqinfo;
void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
struct ocfs2_mem_dqinfo *info);
void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
int ocfs2_create_new_inode_locks(struct inode *inode);
int ocfs2_drop_inode_locks(struct inode *inode);
......@@ -103,6 +119,9 @@ int ocfs2_dentry_lock(struct dentry *dentry, int ex);
void ocfs2_dentry_unlock(struct dentry *dentry, int ex);
int ocfs2_file_lock(struct file *file, int ex, int trylock);
void ocfs2_file_unlock(struct file *file);
int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex);
void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex);
void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
......
此差异已折叠。
......@@ -57,4 +57,28 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
u32 *p_cluster, u32 *num_clusters,
struct ocfs2_extent_list *el);
int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
struct buffer_head *bhs[], int flags,
int (*validate)(struct super_block *sb,
struct buffer_head *bh));
static inline int ocfs2_read_virt_block(struct inode *inode, u64 v_block,
struct buffer_head **bh,
int (*validate)(struct super_block *sb,
struct buffer_head *bh))
{
int status = 0;
if (bh == NULL) {
printk("ocfs2: bh == NULL\n");
status = -EINVAL;
goto bail;
}
status = ocfs2_read_virt_blocks(inode, v_block, 1, bh, 0, validate);
bail:
return status;
}
#endif /* _EXTENT_MAP_H */
此差异已折叠。
......@@ -51,6 +51,9 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac,
enum ocfs2_alloc_restarted *reason_ret);
int ocfs2_simple_size_update(struct inode *inode,
struct buffer_head *di_bh,
u64 new_i_size);
int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size,
u64 zero_to);
int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
......
此差异已折叠。
......@@ -128,7 +128,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, unsigned flags,
int sysfile_type);
int ocfs2_inode_init_private(struct inode *inode);
int ocfs2_inode_revalidate(struct dentry *dentry);
int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
int create_ino);
void ocfs2_read_inode(struct inode *inode);
void ocfs2_read_inode2(struct inode *inode, void *opaque);
......@@ -142,6 +142,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
struct buffer_head *bh);
int ocfs2_aio_read(struct file *file, struct kiocb *req, struct iocb *iocb);
int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb);
struct buffer_head *ocfs2_bread(struct inode *inode,
int block, int *err, int reada);
void ocfs2_set_inode_flags(struct inode *inode);
void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi);
......@@ -153,4 +155,16 @@ static inline blkcnt_t ocfs2_inode_sector_count(struct inode *inode)
return (blkcnt_t)(OCFS2_I(inode)->ip_clusters << c_to_s_bits);
}
/* Validate that a bh contains a valid inode */
int ocfs2_validate_inode_block(struct super_block *sb,
struct buffer_head *bh);
/*
* Read an inode block into *bh. If *bh is NULL, a bh will be allocated.
* This is a cached read. The inode will be validated with
* ocfs2_validate_inode_block().
*/
int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh);
/* The same, but can be passed OCFS2_BH_* flags */
int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
int flags);
#endif /* OCFS2_INODE_H */
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -46,6 +46,7 @@ enum ocfs2_lock_type {
OCFS2_LOCK_TYPE_DENTRY,
OCFS2_LOCK_TYPE_OPEN,
OCFS2_LOCK_TYPE_FLOCK,
OCFS2_LOCK_TYPE_QINFO,
OCFS2_NUM_LOCK_TYPES
};
......@@ -77,6 +78,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
case OCFS2_LOCK_TYPE_FLOCK:
c = 'F';
break;
case OCFS2_LOCK_TYPE_QINFO:
c = 'Q';
break;
default:
c = '\0';
}
......@@ -95,6 +99,7 @@ static char *ocfs2_lock_type_strings[] = {
[OCFS2_LOCK_TYPE_DENTRY] = "Dentry",
[OCFS2_LOCK_TYPE_OPEN] = "Open",
[OCFS2_LOCK_TYPE_FLOCK] = "Flock",
[OCFS2_LOCK_TYPE_QINFO] = "Quota",
};
static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -151,7 +151,7 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
* this is not true, the read of -1 (UINT64_MAX) will fail.
*/
ret = ocfs2_read_blocks(si->si_inode, -1, si->si_blocks, si->si_bh,
OCFS2_BH_IGNORE_CACHE);
OCFS2_BH_IGNORE_CACHE, NULL);
if (ret == 0) {
spin_lock(&osb->osb_lock);
ocfs2_update_slot_info(si);
......@@ -405,7 +405,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
bh = NULL; /* Acquire a fresh bh */
status = ocfs2_read_blocks(si->si_inode, blkno, 1, &bh,
OCFS2_BH_IGNORE_CACHE);
OCFS2_BH_IGNORE_CACHE, NULL);
if (status < 0) {
mlog_errno(status);
goto bail;
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -84,7 +84,7 @@ static char *ocfs2_fast_symlink_getlink(struct inode *inode,
mlog_entry_void();
status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, bh);
status = ocfs2_read_inode_block(inode, bh);
if (status < 0) {
mlog_errno(status);
link = ERR_PTR(status);
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册