提交 d2dd328b 编写于 作者: L Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block

* 'for-linus' of git://git.kernel.dk/linux-2.6-block: (27 commits)
  block: make blk_init_free_list and elevator_init idempotent
  block: avoid unconditionally freeing previously allocated request_queue
  pipe: change /proc/sys/fs/pipe-max-pages to byte sized interface
  pipe: change the privilege required for growing a pipe beyond system max
  pipe: adjust minimum pipe size to 1 page
  block: disable preemption before using sched_clock()
  cciss: call BUG() earlier
  Preparing 8.3.8rc2
  drbd: Reduce verbosity
  drbd: use drbd specific ratelimit instead of global printk_ratelimit
  drbd: fix hang on local read errors while disconnected
  drbd: Removed the now empty w_io_error() function
  drbd: removed duplicated #includes
  drbd: improve usage of MSG_MORE
  drbd: need to set socket bufsize early to take effect
  drbd: improve network latency, TCP_QUICKACK
  drbd: Revert "drbd: Create new current UUID as late as possible"
  brd: support discard
  Revert "writeback: fix WB_SYNC_NONE writeback from umount"
  Revert "writeback: ensure that WB_SYNC_NONE writeback with sb pinned is sync"
  ...
...@@ -467,6 +467,9 @@ static int blk_init_free_list(struct request_queue *q) ...@@ -467,6 +467,9 @@ static int blk_init_free_list(struct request_queue *q)
{ {
struct request_list *rl = &q->rq; struct request_list *rl = &q->rq;
if (unlikely(rl->rq_pool))
return 0;
rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0; rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0;
rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0; rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0;
rl->elvpriv = 0; rl->elvpriv = 0;
...@@ -570,9 +573,17 @@ EXPORT_SYMBOL(blk_init_queue); ...@@ -570,9 +573,17 @@ EXPORT_SYMBOL(blk_init_queue);
struct request_queue * struct request_queue *
blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
{ {
struct request_queue *q = blk_alloc_queue_node(GFP_KERNEL, node_id); struct request_queue *uninit_q, *q;
uninit_q = blk_alloc_queue_node(GFP_KERNEL, node_id);
if (!uninit_q)
return NULL;
q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id);
if (!q)
blk_cleanup_queue(uninit_q);
return blk_init_allocated_queue_node(q, rfn, lock, node_id); return q;
} }
EXPORT_SYMBOL(blk_init_queue_node); EXPORT_SYMBOL(blk_init_queue_node);
...@@ -592,10 +603,8 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn, ...@@ -592,10 +603,8 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
return NULL; return NULL;
q->node = node_id; q->node = node_id;
if (blk_init_free_list(q)) { if (blk_init_free_list(q))
kmem_cache_free(blk_requestq_cachep, q);
return NULL; return NULL;
}
q->request_fn = rfn; q->request_fn = rfn;
q->prep_rq_fn = NULL; q->prep_rq_fn = NULL;
...@@ -618,7 +627,6 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn, ...@@ -618,7 +627,6 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
return q; return q;
} }
blk_put_queue(q);
return NULL; return NULL;
} }
EXPORT_SYMBOL(blk_init_allocated_queue_node); EXPORT_SYMBOL(blk_init_allocated_queue_node);
......
...@@ -64,6 +64,9 @@ static DEFINE_PER_CPU(unsigned long, cfq_ioc_count); ...@@ -64,6 +64,9 @@ static DEFINE_PER_CPU(unsigned long, cfq_ioc_count);
static struct completion *ioc_gone; static struct completion *ioc_gone;
static DEFINE_SPINLOCK(ioc_gone_lock); static DEFINE_SPINLOCK(ioc_gone_lock);
static DEFINE_SPINLOCK(cic_index_lock);
static DEFINE_IDA(cic_index_ida);
#define CFQ_PRIO_LISTS IOPRIO_BE_NR #define CFQ_PRIO_LISTS IOPRIO_BE_NR
#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) #define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) #define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
...@@ -271,6 +274,7 @@ struct cfq_data { ...@@ -271,6 +274,7 @@ struct cfq_data {
unsigned int cfq_latency; unsigned int cfq_latency;
unsigned int cfq_group_isolation; unsigned int cfq_group_isolation;
unsigned int cic_index;
struct list_head cic_list; struct list_head cic_list;
/* /*
...@@ -430,6 +434,24 @@ static inline void cic_set_cfqq(struct cfq_io_context *cic, ...@@ -430,6 +434,24 @@ static inline void cic_set_cfqq(struct cfq_io_context *cic,
cic->cfqq[is_sync] = cfqq; cic->cfqq[is_sync] = cfqq;
} }
#define CIC_DEAD_KEY 1ul
#define CIC_DEAD_INDEX_SHIFT 1
static inline void *cfqd_dead_key(struct cfq_data *cfqd)
{
return (void *)(cfqd->cic_index << CIC_DEAD_INDEX_SHIFT | CIC_DEAD_KEY);
}
static inline struct cfq_data *cic_to_cfqd(struct cfq_io_context *cic)
{
struct cfq_data *cfqd = cic->key;
if (unlikely((unsigned long) cfqd & CIC_DEAD_KEY))
return NULL;
return cfqd;
}
/* /*
* We regard a request as SYNC, if it's either a read or has the SYNC bit * We regard a request as SYNC, if it's either a read or has the SYNC bit
* set (in which case it could also be direct WRITE). * set (in which case it could also be direct WRITE).
...@@ -2510,11 +2532,12 @@ static void cfq_cic_free(struct cfq_io_context *cic) ...@@ -2510,11 +2532,12 @@ static void cfq_cic_free(struct cfq_io_context *cic)
static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic) static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic)
{ {
unsigned long flags; unsigned long flags;
unsigned long dead_key = (unsigned long) cic->key;
BUG_ON(!cic->dead_key); BUG_ON(!(dead_key & CIC_DEAD_KEY));
spin_lock_irqsave(&ioc->lock, flags); spin_lock_irqsave(&ioc->lock, flags);
radix_tree_delete(&ioc->radix_root, cic->dead_key); radix_tree_delete(&ioc->radix_root, dead_key >> CIC_DEAD_INDEX_SHIFT);
hlist_del_rcu(&cic->cic_list); hlist_del_rcu(&cic->cic_list);
spin_unlock_irqrestore(&ioc->lock, flags); spin_unlock_irqrestore(&ioc->lock, flags);
...@@ -2537,15 +2560,10 @@ static void cfq_free_io_context(struct io_context *ioc) ...@@ -2537,15 +2560,10 @@ static void cfq_free_io_context(struct io_context *ioc)
__call_for_each_cic(ioc, cic_free_func); __call_for_each_cic(ioc, cic_free_func);
} }
static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) static void cfq_put_cooperator(struct cfq_queue *cfqq)
{ {
struct cfq_queue *__cfqq, *next; struct cfq_queue *__cfqq, *next;
if (unlikely(cfqq == cfqd->active_queue)) {
__cfq_slice_expired(cfqd, cfqq, 0);
cfq_schedule_dispatch(cfqd);
}
/* /*
* If this queue was scheduled to merge with another queue, be * If this queue was scheduled to merge with another queue, be
* sure to drop the reference taken on that queue (and others in * sure to drop the reference taken on that queue (and others in
...@@ -2561,6 +2579,16 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) ...@@ -2561,6 +2579,16 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
cfq_put_queue(__cfqq); cfq_put_queue(__cfqq);
__cfqq = next; __cfqq = next;
} }
}
static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
if (unlikely(cfqq == cfqd->active_queue)) {
__cfq_slice_expired(cfqd, cfqq, 0);
cfq_schedule_dispatch(cfqd);
}
cfq_put_cooperator(cfqq);
cfq_put_queue(cfqq); cfq_put_queue(cfqq);
} }
...@@ -2573,11 +2601,10 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd, ...@@ -2573,11 +2601,10 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
list_del_init(&cic->queue_list); list_del_init(&cic->queue_list);
/* /*
* Make sure key == NULL is seen for dead queues * Make sure dead mark is seen for dead queues
*/ */
smp_wmb(); smp_wmb();
cic->dead_key = (unsigned long) cic->key; cic->key = cfqd_dead_key(cfqd);
cic->key = NULL;
if (ioc->ioc_data == cic) if (ioc->ioc_data == cic)
rcu_assign_pointer(ioc->ioc_data, NULL); rcu_assign_pointer(ioc->ioc_data, NULL);
...@@ -2596,7 +2623,7 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd, ...@@ -2596,7 +2623,7 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
static void cfq_exit_single_io_context(struct io_context *ioc, static void cfq_exit_single_io_context(struct io_context *ioc,
struct cfq_io_context *cic) struct cfq_io_context *cic)
{ {
struct cfq_data *cfqd = cic->key; struct cfq_data *cfqd = cic_to_cfqd(cic);
if (cfqd) { if (cfqd) {
struct request_queue *q = cfqd->queue; struct request_queue *q = cfqd->queue;
...@@ -2609,7 +2636,7 @@ static void cfq_exit_single_io_context(struct io_context *ioc, ...@@ -2609,7 +2636,7 @@ static void cfq_exit_single_io_context(struct io_context *ioc,
* race between exiting task and queue * race between exiting task and queue
*/ */
smp_read_barrier_depends(); smp_read_barrier_depends();
if (cic->key) if (cic->key == cfqd)
__cfq_exit_single_io_context(cfqd, cic); __cfq_exit_single_io_context(cfqd, cic);
spin_unlock_irqrestore(q->queue_lock, flags); spin_unlock_irqrestore(q->queue_lock, flags);
...@@ -2689,7 +2716,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc) ...@@ -2689,7 +2716,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic) static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic)
{ {
struct cfq_data *cfqd = cic->key; struct cfq_data *cfqd = cic_to_cfqd(cic);
struct cfq_queue *cfqq; struct cfq_queue *cfqq;
unsigned long flags; unsigned long flags;
...@@ -2746,7 +2773,7 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq, ...@@ -2746,7 +2773,7 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic) static void changed_cgroup(struct io_context *ioc, struct cfq_io_context *cic)
{ {
struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1); struct cfq_queue *sync_cfqq = cic_to_cfqq(cic, 1);
struct cfq_data *cfqd = cic->key; struct cfq_data *cfqd = cic_to_cfqd(cic);
unsigned long flags; unsigned long flags;
struct request_queue *q; struct request_queue *q;
...@@ -2883,12 +2910,13 @@ cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc, ...@@ -2883,12 +2910,13 @@ cfq_drop_dead_cic(struct cfq_data *cfqd, struct io_context *ioc,
unsigned long flags; unsigned long flags;
WARN_ON(!list_empty(&cic->queue_list)); WARN_ON(!list_empty(&cic->queue_list));
BUG_ON(cic->key != cfqd_dead_key(cfqd));
spin_lock_irqsave(&ioc->lock, flags); spin_lock_irqsave(&ioc->lock, flags);
BUG_ON(ioc->ioc_data == cic); BUG_ON(ioc->ioc_data == cic);
radix_tree_delete(&ioc->radix_root, (unsigned long) cfqd); radix_tree_delete(&ioc->radix_root, cfqd->cic_index);
hlist_del_rcu(&cic->cic_list); hlist_del_rcu(&cic->cic_list);
spin_unlock_irqrestore(&ioc->lock, flags); spin_unlock_irqrestore(&ioc->lock, flags);
...@@ -2900,7 +2928,6 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc) ...@@ -2900,7 +2928,6 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
{ {
struct cfq_io_context *cic; struct cfq_io_context *cic;
unsigned long flags; unsigned long flags;
void *k;
if (unlikely(!ioc)) if (unlikely(!ioc))
return NULL; return NULL;
...@@ -2917,13 +2944,11 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc) ...@@ -2917,13 +2944,11 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
} }
do { do {
cic = radix_tree_lookup(&ioc->radix_root, (unsigned long) cfqd); cic = radix_tree_lookup(&ioc->radix_root, cfqd->cic_index);
rcu_read_unlock(); rcu_read_unlock();
if (!cic) if (!cic)
break; break;
/* ->key must be copied to avoid race with cfq_exit_queue() */ if (unlikely(cic->key != cfqd)) {
k = cic->key;
if (unlikely(!k)) {
cfq_drop_dead_cic(cfqd, ioc, cic); cfq_drop_dead_cic(cfqd, ioc, cic);
rcu_read_lock(); rcu_read_lock();
continue; continue;
...@@ -2956,7 +2981,7 @@ static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, ...@@ -2956,7 +2981,7 @@ static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc,
spin_lock_irqsave(&ioc->lock, flags); spin_lock_irqsave(&ioc->lock, flags);
ret = radix_tree_insert(&ioc->radix_root, ret = radix_tree_insert(&ioc->radix_root,
(unsigned long) cfqd, cic); cfqd->cic_index, cic);
if (!ret) if (!ret)
hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list); hlist_add_head_rcu(&cic->cic_list, &ioc->cic_list);
spin_unlock_irqrestore(&ioc->lock, flags); spin_unlock_irqrestore(&ioc->lock, flags);
...@@ -3516,6 +3541,9 @@ split_cfqq(struct cfq_io_context *cic, struct cfq_queue *cfqq) ...@@ -3516,6 +3541,9 @@ split_cfqq(struct cfq_io_context *cic, struct cfq_queue *cfqq)
} }
cic_set_cfqq(cic, NULL, 1); cic_set_cfqq(cic, NULL, 1);
cfq_put_cooperator(cfqq);
cfq_put_queue(cfqq); cfq_put_queue(cfqq);
return NULL; return NULL;
} }
...@@ -3708,10 +3736,32 @@ static void cfq_exit_queue(struct elevator_queue *e) ...@@ -3708,10 +3736,32 @@ static void cfq_exit_queue(struct elevator_queue *e)
cfq_shutdown_timer_wq(cfqd); cfq_shutdown_timer_wq(cfqd);
spin_lock(&cic_index_lock);
ida_remove(&cic_index_ida, cfqd->cic_index);
spin_unlock(&cic_index_lock);
/* Wait for cfqg->blkg->key accessors to exit their grace periods. */ /* Wait for cfqg->blkg->key accessors to exit their grace periods. */
call_rcu(&cfqd->rcu, cfq_cfqd_free); call_rcu(&cfqd->rcu, cfq_cfqd_free);
} }
static int cfq_alloc_cic_index(void)
{
int index, error;
do {
if (!ida_pre_get(&cic_index_ida, GFP_KERNEL))
return -ENOMEM;
spin_lock(&cic_index_lock);
error = ida_get_new(&cic_index_ida, &index);
spin_unlock(&cic_index_lock);
if (error && error != -EAGAIN)
return error;
} while (error);
return index;
}
static void *cfq_init_queue(struct request_queue *q) static void *cfq_init_queue(struct request_queue *q)
{ {
struct cfq_data *cfqd; struct cfq_data *cfqd;
...@@ -3719,10 +3769,16 @@ static void *cfq_init_queue(struct request_queue *q) ...@@ -3719,10 +3769,16 @@ static void *cfq_init_queue(struct request_queue *q)
struct cfq_group *cfqg; struct cfq_group *cfqg;
struct cfq_rb_root *st; struct cfq_rb_root *st;
i = cfq_alloc_cic_index();
if (i < 0)
return NULL;
cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
if (!cfqd) if (!cfqd)
return NULL; return NULL;
cfqd->cic_index = i;
/* Init root service tree */ /* Init root service tree */
cfqd->grp_service_tree = CFQ_RB_ROOT; cfqd->grp_service_tree = CFQ_RB_ROOT;
...@@ -3984,6 +4040,7 @@ static void __exit cfq_exit(void) ...@@ -3984,6 +4040,7 @@ static void __exit cfq_exit(void)
*/ */
if (elv_ioc_count_read(cfq_ioc_count)) if (elv_ioc_count_read(cfq_ioc_count))
wait_for_completion(&all_gone); wait_for_completion(&all_gone);
ida_destroy(&cic_index_ida);
cfq_slab_kill(); cfq_slab_kill();
} }
......
...@@ -242,9 +242,11 @@ int elevator_init(struct request_queue *q, char *name) ...@@ -242,9 +242,11 @@ int elevator_init(struct request_queue *q, char *name)
{ {
struct elevator_type *e = NULL; struct elevator_type *e = NULL;
struct elevator_queue *eq; struct elevator_queue *eq;
int ret = 0;
void *data; void *data;
if (unlikely(q->elevator))
return 0;
INIT_LIST_HEAD(&q->queue_head); INIT_LIST_HEAD(&q->queue_head);
q->last_merge = NULL; q->last_merge = NULL;
q->end_sector = 0; q->end_sector = 0;
...@@ -284,7 +286,7 @@ int elevator_init(struct request_queue *q, char *name) ...@@ -284,7 +286,7 @@ int elevator_init(struct request_queue *q, char *name)
} }
elevator_attach(q, eq, data); elevator_attach(q, eq, data);
return ret; return 0;
} }
EXPORT_SYMBOL(elevator_init); EXPORT_SYMBOL(elevator_init);
...@@ -1097,7 +1099,7 @@ ssize_t elv_iosched_show(struct request_queue *q, char *name) ...@@ -1097,7 +1099,7 @@ ssize_t elv_iosched_show(struct request_queue *q, char *name)
struct elevator_type *__e; struct elevator_type *__e;
int len = 0; int len = 0;
if (!q->elevator) if (!q->elevator || !blk_queue_stackable(q))
return sprintf(name, "none\n"); return sprintf(name, "none\n");
elv = e->elevator_type; elv = e->elevator_type;
......
...@@ -133,6 +133,28 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector) ...@@ -133,6 +133,28 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
return page; return page;
} }
static void brd_free_page(struct brd_device *brd, sector_t sector)
{
struct page *page;
pgoff_t idx;
spin_lock(&brd->brd_lock);
idx = sector >> PAGE_SECTORS_SHIFT;
page = radix_tree_delete(&brd->brd_pages, idx);
spin_unlock(&brd->brd_lock);
if (page)
__free_page(page);
}
static void brd_zero_page(struct brd_device *brd, sector_t sector)
{
struct page *page;
page = brd_lookup_page(brd, sector);
if (page)
clear_highpage(page);
}
/* /*
* Free all backing store pages and radix tree. This must only be called when * Free all backing store pages and radix tree. This must only be called when
* there are no other users of the device. * there are no other users of the device.
...@@ -189,6 +211,24 @@ static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n) ...@@ -189,6 +211,24 @@ static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n)
return 0; return 0;
} }
static void discard_from_brd(struct brd_device *brd,
sector_t sector, size_t n)
{
while (n >= PAGE_SIZE) {
/*
* Don't want to actually discard pages here because
* re-allocating the pages can result in writeback
* deadlocks under heavy load.
*/
if (0)
brd_free_page(brd, sector);
else
brd_zero_page(brd, sector);
sector += PAGE_SIZE >> SECTOR_SHIFT;
n -= PAGE_SIZE;
}
}
/* /*
* Copy n bytes from src to the brd starting at sector. Does not sleep. * Copy n bytes from src to the brd starting at sector. Does not sleep.
*/ */
...@@ -300,6 +340,12 @@ static int brd_make_request(struct request_queue *q, struct bio *bio) ...@@ -300,6 +340,12 @@ static int brd_make_request(struct request_queue *q, struct bio *bio)
get_capacity(bdev->bd_disk)) get_capacity(bdev->bd_disk))
goto out; goto out;
if (unlikely(bio_rw_flagged(bio, BIO_RW_DISCARD))) {
err = 0;
discard_from_brd(brd, sector, bio->bi_size);
goto out;
}
rw = bio_rw(bio); rw = bio_rw(bio);
if (rw == READA) if (rw == READA)
rw = READ; rw = READ;
...@@ -320,7 +366,7 @@ static int brd_make_request(struct request_queue *q, struct bio *bio) ...@@ -320,7 +366,7 @@ static int brd_make_request(struct request_queue *q, struct bio *bio)
} }
#ifdef CONFIG_BLK_DEV_XIP #ifdef CONFIG_BLK_DEV_XIP
static int brd_direct_access (struct block_device *bdev, sector_t sector, static int brd_direct_access(struct block_device *bdev, sector_t sector,
void **kaddr, unsigned long *pfn) void **kaddr, unsigned long *pfn)
{ {
struct brd_device *brd = bdev->bd_disk->private_data; struct brd_device *brd = bdev->bd_disk->private_data;
...@@ -437,6 +483,11 @@ static struct brd_device *brd_alloc(int i) ...@@ -437,6 +483,11 @@ static struct brd_device *brd_alloc(int i)
blk_queue_max_hw_sectors(brd->brd_queue, 1024); blk_queue_max_hw_sectors(brd->brd_queue, 1024);
blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY); blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
brd->brd_queue->limits.discard_granularity = PAGE_SIZE;
brd->brd_queue->limits.max_discard_sectors = UINT_MAX;
brd->brd_queue->limits.discard_zeroes_data = 1;
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue);
disk = brd->brd_disk = alloc_disk(1 << part_shift); disk = brd->brd_disk = alloc_disk(1 << part_shift);
if (!disk) if (!disk)
goto out_free_queue; goto out_free_queue;
......
...@@ -188,11 +188,11 @@ scsi_cmd_free(ctlr_info_t *h, CommandList_struct *cmd) ...@@ -188,11 +188,11 @@ scsi_cmd_free(ctlr_info_t *h, CommandList_struct *cmd)
sa = h->scsi_ctlr; sa = h->scsi_ctlr;
stk = &sa->cmd_stack; stk = &sa->cmd_stack;
stk->top++;
if (stk->top >= CMD_STACK_SIZE) { if (stk->top >= CMD_STACK_SIZE) {
printk("cciss: scsi_cmd_free called too many times.\n"); printk("cciss: scsi_cmd_free called too many times.\n");
BUG(); BUG();
} }
stk->top++;
stk->elem[stk->top] = (struct cciss_scsi_cmd_stack_elem_t *) cmd; stk->elem[stk->top] = (struct cciss_scsi_cmd_stack_elem_t *) cmd;
} }
......
...@@ -943,8 +943,7 @@ struct drbd_conf { ...@@ -943,8 +943,7 @@ struct drbd_conf {
struct drbd_work resync_work, struct drbd_work resync_work,
unplug_work, unplug_work,
md_sync_work, md_sync_work,
delay_probe_work, delay_probe_work;
uuid_work;
struct timer_list resync_timer; struct timer_list resync_timer;
struct timer_list md_sync_timer; struct timer_list md_sync_timer;
struct timer_list delay_probe_timer; struct timer_list delay_probe_timer;
...@@ -1069,7 +1068,6 @@ struct drbd_conf { ...@@ -1069,7 +1068,6 @@ struct drbd_conf {
struct timeval dps_time; /* delay-probes-start-time */ struct timeval dps_time; /* delay-probes-start-time */
unsigned int dp_volume_last; /* send_cnt of last delay probe */ unsigned int dp_volume_last; /* send_cnt of last delay probe */
int c_sync_rate; /* current resync rate after delay_probe magic */ int c_sync_rate; /* current resync rate after delay_probe magic */
atomic_t new_c_uuid;
}; };
static inline struct drbd_conf *minor_to_mdev(unsigned int minor) static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
...@@ -1476,7 +1474,6 @@ extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int); ...@@ -1476,7 +1474,6 @@ extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int);
extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int); extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int);
extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int); extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int);
extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int); extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int);
extern int w_io_error(struct drbd_conf *, struct drbd_work *, int);
extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int); extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int);
extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int); extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int);
extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int); extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int);
...@@ -1542,7 +1539,7 @@ static inline void drbd_tcp_nodelay(struct socket *sock) ...@@ -1542,7 +1539,7 @@ static inline void drbd_tcp_nodelay(struct socket *sock)
static inline void drbd_tcp_quickack(struct socket *sock) static inline void drbd_tcp_quickack(struct socket *sock)
{ {
int __user val = 1; int __user val = 2;
(void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK, (void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK,
(char __user *)&val, sizeof(val)); (char __user *)&val, sizeof(val));
} }
...@@ -1728,7 +1725,7 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, ...@@ -1728,7 +1725,7 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach,
switch (mdev->ldev->dc.on_io_error) { switch (mdev->ldev->dc.on_io_error) {
case EP_PASS_ON: case EP_PASS_ON:
if (!forcedetach) { if (!forcedetach) {
if (printk_ratelimit()) if (__ratelimit(&drbd_ratelimit_state))
dev_err(DEV, "Local IO failed in %s." dev_err(DEV, "Local IO failed in %s."
"Passing error on...\n", where); "Passing error on...\n", where);
break; break;
...@@ -2219,8 +2216,6 @@ static inline int __inc_ap_bio_cond(struct drbd_conf *mdev) ...@@ -2219,8 +2216,6 @@ static inline int __inc_ap_bio_cond(struct drbd_conf *mdev)
return 0; return 0;
if (test_bit(BITMAP_IO, &mdev->flags)) if (test_bit(BITMAP_IO, &mdev->flags))
return 0; return 0;
if (atomic_read(&mdev->new_c_uuid))
return 0;
return 1; return 1;
} }
...@@ -2241,9 +2236,6 @@ static inline void inc_ap_bio(struct drbd_conf *mdev, int count) ...@@ -2241,9 +2236,6 @@ static inline void inc_ap_bio(struct drbd_conf *mdev, int count)
* to avoid races with the reconnect code, * to avoid races with the reconnect code,
* we need to atomic_inc within the spinlock. */ * we need to atomic_inc within the spinlock. */
if (atomic_read(&mdev->new_c_uuid) && atomic_add_unless(&mdev->new_c_uuid, -1, 1))
drbd_queue_work_front(&mdev->data.work, &mdev->uuid_work);
spin_lock_irq(&mdev->req_lock); spin_lock_irq(&mdev->req_lock);
while (!__inc_ap_bio_cond(mdev)) { while (!__inc_ap_bio_cond(mdev)) {
prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE); prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE);
......
...@@ -1215,18 +1215,17 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, ...@@ -1215,18 +1215,17 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
ns.pdsk == D_OUTDATED)) { ns.pdsk == D_OUTDATED)) {
if (get_ldev(mdev)) { if (get_ldev(mdev)) {
if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE && mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
!atomic_read(&mdev->new_c_uuid)) drbd_uuid_new_current(mdev);
atomic_set(&mdev->new_c_uuid, 2); drbd_send_uuids(mdev);
}
put_ldev(mdev); put_ldev(mdev);
} }
} }
if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) {
/* Diskless peer becomes primary or got connected do diskless, primary peer. */ if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0)
if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0 && drbd_uuid_new_current(mdev);
!atomic_read(&mdev->new_c_uuid))
atomic_set(&mdev->new_c_uuid, 2);
/* D_DISKLESS Peer becomes secondary */ /* D_DISKLESS Peer becomes secondary */
if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
...@@ -1350,24 +1349,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, ...@@ -1350,24 +1349,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
drbd_md_sync(mdev); drbd_md_sync(mdev);
} }
static int w_new_current_uuid(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
{
if (get_ldev(mdev)) {
if (mdev->ldev->md.uuid[UI_BITMAP] == 0) {
drbd_uuid_new_current(mdev);
if (get_net_conf(mdev)) {
drbd_send_uuids(mdev);
put_net_conf(mdev);
}
drbd_md_sync(mdev);
}
put_ldev(mdev);
}
atomic_dec(&mdev->new_c_uuid);
wake_up(&mdev->misc_wait);
return 1;
}
static int drbd_thread_setup(void *arg) static int drbd_thread_setup(void *arg)
{ {
...@@ -2291,9 +2272,9 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * ...@@ -2291,9 +2272,9 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *
* with page_count == 0 or PageSlab. * with page_count == 0 or PageSlab.
*/ */
static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
int offset, size_t size) int offset, size_t size, unsigned msg_flags)
{ {
int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, 0); int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, msg_flags);
kunmap(page); kunmap(page);
if (sent == size) if (sent == size)
mdev->send_cnt += size>>9; mdev->send_cnt += size>>9;
...@@ -2301,7 +2282,7 @@ static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, ...@@ -2301,7 +2282,7 @@ static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
} }
static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
int offset, size_t size) int offset, size_t size, unsigned msg_flags)
{ {
mm_segment_t oldfs = get_fs(); mm_segment_t oldfs = get_fs();
int sent, ok; int sent, ok;
...@@ -2314,14 +2295,15 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, ...@@ -2314,14 +2295,15 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
* __page_cache_release a page that would actually still be referenced * __page_cache_release a page that would actually still be referenced
* by someone, leading to some obscure delayed Oops somewhere else. */ * by someone, leading to some obscure delayed Oops somewhere else. */
if (disable_sendpage || (page_count(page) < 1) || PageSlab(page)) if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
return _drbd_no_send_page(mdev, page, offset, size); return _drbd_no_send_page(mdev, page, offset, size, msg_flags);
msg_flags |= MSG_NOSIGNAL;
drbd_update_congested(mdev); drbd_update_congested(mdev);
set_fs(KERNEL_DS); set_fs(KERNEL_DS);
do { do {
sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page, sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page,
offset, len, offset, len,
MSG_NOSIGNAL); msg_flags);
if (sent == -EAGAIN) { if (sent == -EAGAIN) {
if (we_should_drop_the_connection(mdev, if (we_should_drop_the_connection(mdev,
mdev->data.socket)) mdev->data.socket))
...@@ -2350,9 +2332,11 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio) ...@@ -2350,9 +2332,11 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
{ {
struct bio_vec *bvec; struct bio_vec *bvec;
int i; int i;
/* hint all but last page with MSG_MORE */
__bio_for_each_segment(bvec, bio, i, 0) { __bio_for_each_segment(bvec, bio, i, 0) {
if (!_drbd_no_send_page(mdev, bvec->bv_page, if (!_drbd_no_send_page(mdev, bvec->bv_page,
bvec->bv_offset, bvec->bv_len)) bvec->bv_offset, bvec->bv_len,
i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
return 0; return 0;
} }
return 1; return 1;
...@@ -2362,12 +2346,13 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) ...@@ -2362,12 +2346,13 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
{ {
struct bio_vec *bvec; struct bio_vec *bvec;
int i; int i;
/* hint all but last page with MSG_MORE */
__bio_for_each_segment(bvec, bio, i, 0) { __bio_for_each_segment(bvec, bio, i, 0) {
if (!_drbd_send_page(mdev, bvec->bv_page, if (!_drbd_send_page(mdev, bvec->bv_page,
bvec->bv_offset, bvec->bv_len)) bvec->bv_offset, bvec->bv_len,
i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
return 0; return 0;
} }
return 1; return 1;
} }
...@@ -2375,9 +2360,11 @@ static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e) ...@@ -2375,9 +2360,11 @@ static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e)
{ {
struct page *page = e->pages; struct page *page = e->pages;
unsigned len = e->size; unsigned len = e->size;
/* hint all but last page with MSG_MORE */
page_chain_for_each(page) { page_chain_for_each(page) {
unsigned l = min_t(unsigned, len, PAGE_SIZE); unsigned l = min_t(unsigned, len, PAGE_SIZE);
if (!_drbd_send_page(mdev, page, 0, l)) if (!_drbd_send_page(mdev, page, 0, l,
page_chain_next(page) ? MSG_MORE : 0))
return 0; return 0;
len -= l; len -= l;
} }
...@@ -2457,11 +2444,11 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) ...@@ -2457,11 +2444,11 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
p.dp_flags = cpu_to_be32(dp_flags); p.dp_flags = cpu_to_be32(dp_flags);
set_bit(UNPLUG_REMOTE, &mdev->flags); set_bit(UNPLUG_REMOTE, &mdev->flags);
ok = (sizeof(p) == ok = (sizeof(p) ==
drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE)); drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0));
if (ok && dgs) { if (ok && dgs) {
dgb = mdev->int_dig_out; dgb = mdev->int_dig_out;
drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb);
ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
} }
if (ok) { if (ok) {
if (mdev->net_conf->wire_protocol == DRBD_PROT_A) if (mdev->net_conf->wire_protocol == DRBD_PROT_A)
...@@ -2510,11 +2497,11 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, ...@@ -2510,11 +2497,11 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
return 0; return 0;
ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p,
sizeof(p), MSG_MORE); sizeof(p), dgs ? MSG_MORE : 0);
if (ok && dgs) { if (ok && dgs) {
dgb = mdev->int_dig_out; dgb = mdev->int_dig_out;
drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb); drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb);
ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
} }
if (ok) if (ok)
ok = _drbd_send_zc_ee(mdev, e); ok = _drbd_send_zc_ee(mdev, e);
...@@ -2708,7 +2695,6 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) ...@@ -2708,7 +2695,6 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
atomic_set(&mdev->net_cnt, 0); atomic_set(&mdev->net_cnt, 0);
atomic_set(&mdev->packet_seq, 0); atomic_set(&mdev->packet_seq, 0);
atomic_set(&mdev->pp_in_use, 0); atomic_set(&mdev->pp_in_use, 0);
atomic_set(&mdev->new_c_uuid, 0);
mutex_init(&mdev->md_io_mutex); mutex_init(&mdev->md_io_mutex);
mutex_init(&mdev->data.mutex); mutex_init(&mdev->data.mutex);
...@@ -2739,14 +2725,12 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) ...@@ -2739,14 +2725,12 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
INIT_LIST_HEAD(&mdev->bm_io_work.w.list); INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
INIT_LIST_HEAD(&mdev->delay_probes); INIT_LIST_HEAD(&mdev->delay_probes);
INIT_LIST_HEAD(&mdev->delay_probe_work.list); INIT_LIST_HEAD(&mdev->delay_probe_work.list);
INIT_LIST_HEAD(&mdev->uuid_work.list);
mdev->resync_work.cb = w_resync_inactive; mdev->resync_work.cb = w_resync_inactive;
mdev->unplug_work.cb = w_send_write_hint; mdev->unplug_work.cb = w_send_write_hint;
mdev->md_sync_work.cb = w_md_sync; mdev->md_sync_work.cb = w_md_sync;
mdev->bm_io_work.w.cb = w_bitmap_io; mdev->bm_io_work.w.cb = w_bitmap_io;
mdev->delay_probe_work.cb = w_delay_probes; mdev->delay_probe_work.cb = w_delay_probes;
mdev->uuid_work.cb = w_new_current_uuid;
init_timer(&mdev->resync_timer); init_timer(&mdev->resync_timer);
init_timer(&mdev->md_sync_timer); init_timer(&mdev->md_sync_timer);
init_timer(&mdev->delay_probe_timer); init_timer(&mdev->delay_probe_timer);
...@@ -3799,7 +3783,7 @@ _drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) ...@@ -3799,7 +3783,7 @@ _drbd_insert_fault(struct drbd_conf *mdev, unsigned int type)
if (ret) { if (ret) {
fault_count++; fault_count++;
if (printk_ratelimit()) if (__ratelimit(&drbd_ratelimit_state))
dev_warn(DEV, "***Simulating %s failure\n", dev_warn(DEV, "***Simulating %s failure\n",
_drbd_fault_str(type)); _drbd_fault_str(type));
} }
......
...@@ -42,7 +42,6 @@ ...@@ -42,7 +42,6 @@
#include <linux/unistd.h> #include <linux/unistd.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/random.h> #include <linux/random.h>
#include <linux/mm.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/scatterlist.h> #include <linux/scatterlist.h>
#include "drbd_int.h" #include "drbd_int.h"
...@@ -571,6 +570,25 @@ static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) ...@@ -571,6 +570,25 @@ static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size)
return rv; return rv;
} }
/* quoting tcp(7):
* On individual connections, the socket buffer size must be set prior to the
* listen(2) or connect(2) calls in order to have it take effect.
* This is our wrapper to do so.
*/
static void drbd_setbufsize(struct socket *sock, unsigned int snd,
unsigned int rcv)
{
/* open coded SO_SNDBUF, SO_RCVBUF */
if (snd) {
sock->sk->sk_sndbuf = snd;
sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
}
if (rcv) {
sock->sk->sk_rcvbuf = rcv;
sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
}
}
static struct socket *drbd_try_connect(struct drbd_conf *mdev) static struct socket *drbd_try_connect(struct drbd_conf *mdev)
{ {
const char *what; const char *what;
...@@ -592,6 +610,8 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev) ...@@ -592,6 +610,8 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev)
sock->sk->sk_rcvtimeo = sock->sk->sk_rcvtimeo =
sock->sk->sk_sndtimeo = mdev->net_conf->try_connect_int*HZ; sock->sk->sk_sndtimeo = mdev->net_conf->try_connect_int*HZ;
drbd_setbufsize(sock, mdev->net_conf->sndbuf_size,
mdev->net_conf->rcvbuf_size);
/* explicitly bind to the configured IP as source IP /* explicitly bind to the configured IP as source IP
* for the outgoing connections. * for the outgoing connections.
...@@ -670,6 +690,8 @@ static struct socket *drbd_wait_for_connect(struct drbd_conf *mdev) ...@@ -670,6 +690,8 @@ static struct socket *drbd_wait_for_connect(struct drbd_conf *mdev)
s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */ s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
s_listen->sk->sk_rcvtimeo = timeo; s_listen->sk->sk_rcvtimeo = timeo;
s_listen->sk->sk_sndtimeo = timeo; s_listen->sk->sk_sndtimeo = timeo;
drbd_setbufsize(s_listen, mdev->net_conf->sndbuf_size,
mdev->net_conf->rcvbuf_size);
what = "bind before listen"; what = "bind before listen";
err = s_listen->ops->bind(s_listen, err = s_listen->ops->bind(s_listen,
...@@ -856,16 +878,6 @@ static int drbd_connect(struct drbd_conf *mdev) ...@@ -856,16 +878,6 @@ static int drbd_connect(struct drbd_conf *mdev)
sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK; sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
msock->sk->sk_priority = TC_PRIO_INTERACTIVE; msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
if (mdev->net_conf->sndbuf_size) {
sock->sk->sk_sndbuf = mdev->net_conf->sndbuf_size;
sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
}
if (mdev->net_conf->rcvbuf_size) {
sock->sk->sk_rcvbuf = mdev->net_conf->rcvbuf_size;
sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
}
/* NOT YET ... /* NOT YET ...
* sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; * sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10;
* sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
...@@ -1154,17 +1166,6 @@ int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, ...@@ -1154,17 +1166,6 @@ int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
unsigned n_bios = 0; unsigned n_bios = 0;
unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
if (atomic_read(&mdev->new_c_uuid)) {
if (atomic_add_unless(&mdev->new_c_uuid, -1, 1)) {
drbd_uuid_new_current(mdev);
drbd_md_sync(mdev);
atomic_dec(&mdev->new_c_uuid);
wake_up(&mdev->misc_wait);
}
wait_event(mdev->misc_wait, !atomic_read(&mdev->new_c_uuid));
}
/* In most cases, we will only need one bio. But in case the lower /* In most cases, we will only need one bio. But in case the lower
* level restrictions happen to be different at this offset on this * level restrictions happen to be different at this offset on this
* side than those of the sending peer, we may need to submit the * side than those of the sending peer, we may need to submit the
......
...@@ -102,32 +102,7 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const ...@@ -102,32 +102,7 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const
} }
} }
/* if it was a local io error, we want to notify our
* peer about that, and see if we need to
* detach the disk and stuff.
* to avoid allocating some special work
* struct, reuse the request. */
/* THINK
* why do we do this not when we detect the error,
* but delay it until it is "done", i.e. possibly
* until the next barrier ack? */
if (rw == WRITE &&
((s & RQ_LOCAL_MASK) && !(s & RQ_LOCAL_OK))) {
if (!(req->w.list.next == LIST_POISON1 ||
list_empty(&req->w.list))) {
/* DEBUG ASSERT only; if this triggers, we
* probably corrupt the worker list here */
dev_err(DEV, "req->w.list.next = %p\n", req->w.list.next);
dev_err(DEV, "req->w.list.prev = %p\n", req->w.list.prev);
}
req->w.cb = w_io_error;
drbd_queue_work(&mdev->data.work, &req->w);
/* drbd_req_free() is done in w_io_error */
} else {
drbd_req_free(req); drbd_req_free(req);
}
} }
static void queue_barrier(struct drbd_conf *mdev) static void queue_barrier(struct drbd_conf *mdev)
...@@ -453,9 +428,6 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, ...@@ -453,9 +428,6 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state |= RQ_LOCAL_COMPLETED;
req->rq_state &= ~RQ_LOCAL_PENDING; req->rq_state &= ~RQ_LOCAL_PENDING;
dev_alert(DEV, "Local WRITE failed sec=%llus size=%u\n",
(unsigned long long)req->sector, req->size);
/* and now: check how to handle local io error. */
__drbd_chk_io_error(mdev, FALSE); __drbd_chk_io_error(mdev, FALSE);
_req_may_be_done(req, m); _req_may_be_done(req, m);
put_ldev(mdev); put_ldev(mdev);
...@@ -475,22 +447,21 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, ...@@ -475,22 +447,21 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state |= RQ_LOCAL_COMPLETED;
req->rq_state &= ~RQ_LOCAL_PENDING; req->rq_state &= ~RQ_LOCAL_PENDING;
dev_alert(DEV, "Local READ failed sec=%llus size=%u\n",
(unsigned long long)req->sector, req->size);
/* _req_mod(req,to_be_send); oops, recursion... */
D_ASSERT(!(req->rq_state & RQ_NET_MASK)); D_ASSERT(!(req->rq_state & RQ_NET_MASK));
req->rq_state |= RQ_NET_PENDING;
inc_ap_pending(mdev);
__drbd_chk_io_error(mdev, FALSE); __drbd_chk_io_error(mdev, FALSE);
put_ldev(mdev); put_ldev(mdev);
/* NOTE: if we have no connection,
* or know the peer has no good data either,
* then we don't actually need to "queue_for_net_read",
* but we do so anyways, since the drbd_io_error()
* and the potential state change to "Diskless"
* needs to be done from process context */
/* no point in retrying if there is no good remote data,
* or we have no connection. */
if (mdev->state.pdsk != D_UP_TO_DATE) {
_req_may_be_done(req, m);
break;
}
/* _req_mod(req,to_be_send); oops, recursion... */
req->rq_state |= RQ_NET_PENDING;
inc_ap_pending(mdev);
/* fall through: _req_mod(req,queue_for_net_read); */ /* fall through: _req_mod(req,queue_for_net_read); */
case queue_for_net_read: case queue_for_net_read:
...@@ -600,6 +571,9 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, ...@@ -600,6 +571,9 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
_req_may_be_done(req, m); _req_may_be_done(req, m);
break; break;
case read_retry_remote_canceled:
req->rq_state &= ~RQ_NET_QUEUED;
/* fall through, in case we raced with drbd_disconnect */
case connection_lost_while_pending: case connection_lost_while_pending:
/* transfer log cleanup after connection loss */ /* transfer log cleanup after connection loss */
/* assert something? */ /* assert something? */
......
...@@ -91,6 +91,7 @@ enum drbd_req_event { ...@@ -91,6 +91,7 @@ enum drbd_req_event {
send_failed, send_failed,
handed_over_to_network, handed_over_to_network,
connection_lost_while_pending, connection_lost_while_pending,
read_retry_remote_canceled,
recv_acked_by_peer, recv_acked_by_peer,
write_acked_by_peer, write_acked_by_peer,
write_acked_by_peer_and_sis, /* and set_in_sync */ write_acked_by_peer_and_sis, /* and set_in_sync */
......
...@@ -224,9 +224,6 @@ void drbd_endio_pri(struct bio *bio, int error) ...@@ -224,9 +224,6 @@ void drbd_endio_pri(struct bio *bio, int error)
enum drbd_req_event what; enum drbd_req_event what;
int uptodate = bio_flagged(bio, BIO_UPTODATE); int uptodate = bio_flagged(bio, BIO_UPTODATE);
if (error)
dev_warn(DEV, "p %s: error=%d\n",
bio_data_dir(bio) == WRITE ? "write" : "read", error);
if (!error && !uptodate) { if (!error && !uptodate) {
dev_warn(DEV, "p %s: setting error to -EIO\n", dev_warn(DEV, "p %s: setting error to -EIO\n",
bio_data_dir(bio) == WRITE ? "write" : "read"); bio_data_dir(bio) == WRITE ? "write" : "read");
...@@ -257,20 +254,6 @@ void drbd_endio_pri(struct bio *bio, int error) ...@@ -257,20 +254,6 @@ void drbd_endio_pri(struct bio *bio, int error)
complete_master_bio(mdev, &m); complete_master_bio(mdev, &m);
} }
int w_io_error(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
{
struct drbd_request *req = container_of(w, struct drbd_request, w);
/* NOTE: mdev->ldev can be NULL by the time we get here! */
/* D_ASSERT(mdev->ldev->dc.on_io_error != EP_PASS_ON); */
/* the only way this callback is scheduled is from _req_may_be_done,
* when it is done and had a local write error, see comments there */
drbd_req_free(req);
return TRUE;
}
int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
{ {
struct drbd_request *req = container_of(w, struct drbd_request, w); struct drbd_request *req = container_of(w, struct drbd_request, w);
...@@ -280,12 +263,9 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) ...@@ -280,12 +263,9 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
* to give the disk the chance to relocate that block */ * to give the disk the chance to relocate that block */
spin_lock_irq(&mdev->req_lock); spin_lock_irq(&mdev->req_lock);
if (cancel || if (cancel || mdev->state.pdsk != D_UP_TO_DATE) {
mdev->state.conn < C_CONNECTED || _req_mod(req, read_retry_remote_canceled);
mdev->state.pdsk <= D_INCONSISTENT) {
_req_mod(req, send_canceled);
spin_unlock_irq(&mdev->req_lock); spin_unlock_irq(&mdev->req_lock);
dev_alert(DEV, "WE ARE LOST. Local IO failure, no peer.\n");
return 1; return 1;
} }
spin_unlock_irq(&mdev->req_lock); spin_unlock_irq(&mdev->req_lock);
......
...@@ -45,7 +45,6 @@ struct wb_writeback_args { ...@@ -45,7 +45,6 @@ struct wb_writeback_args {
unsigned int for_kupdate:1; unsigned int for_kupdate:1;
unsigned int range_cyclic:1; unsigned int range_cyclic:1;
unsigned int for_background:1; unsigned int for_background:1;
unsigned int sb_pinned:1;
}; };
/* /*
...@@ -193,8 +192,7 @@ static void bdi_wait_on_work_clear(struct bdi_work *work) ...@@ -193,8 +192,7 @@ static void bdi_wait_on_work_clear(struct bdi_work *work)
} }
static void bdi_alloc_queue_work(struct backing_dev_info *bdi, static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
struct wb_writeback_args *args, struct wb_writeback_args *args)
int wait)
{ {
struct bdi_work *work; struct bdi_work *work;
...@@ -206,8 +204,6 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi, ...@@ -206,8 +204,6 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
if (work) { if (work) {
bdi_work_init(work, args); bdi_work_init(work, args);
bdi_queue_work(bdi, work); bdi_queue_work(bdi, work);
if (wait)
bdi_wait_on_work_clear(work);
} else { } else {
struct bdi_writeback *wb = &bdi->wb; struct bdi_writeback *wb = &bdi->wb;
...@@ -234,11 +230,6 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, ...@@ -234,11 +230,6 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
.sync_mode = WB_SYNC_ALL, .sync_mode = WB_SYNC_ALL,
.nr_pages = LONG_MAX, .nr_pages = LONG_MAX,
.range_cyclic = 0, .range_cyclic = 0,
/*
* Setting sb_pinned is not necessary for WB_SYNC_ALL, but
* lets make it explicitly clear.
*/
.sb_pinned = 1,
}; };
struct bdi_work work; struct bdi_work work;
...@@ -254,23 +245,21 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, ...@@ -254,23 +245,21 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
* @bdi: the backing device to write from * @bdi: the backing device to write from
* @sb: write inodes from this super_block * @sb: write inodes from this super_block
* @nr_pages: the number of pages to write * @nr_pages: the number of pages to write
* @sb_locked: caller already holds sb umount sem.
* *
* Description: * Description:
* This does WB_SYNC_NONE opportunistic writeback. The IO is only * This does WB_SYNC_NONE opportunistic writeback. The IO is only
* started when this function returns, we make no guarentees on * started when this function returns, we make no guarentees on
* completion. Caller specifies whether sb umount sem is held already or not. * completion. Caller need not hold sb s_umount semaphore.
* *
*/ */
void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
long nr_pages, int sb_locked) long nr_pages)
{ {
struct wb_writeback_args args = { struct wb_writeback_args args = {
.sb = sb, .sb = sb,
.sync_mode = WB_SYNC_NONE, .sync_mode = WB_SYNC_NONE,
.nr_pages = nr_pages, .nr_pages = nr_pages,
.range_cyclic = 1, .range_cyclic = 1,
.sb_pinned = sb_locked,
}; };
/* /*
...@@ -282,7 +271,7 @@ void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, ...@@ -282,7 +271,7 @@ void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
args.for_background = 1; args.for_background = 1;
} }
bdi_alloc_queue_work(bdi, &args, sb_locked); bdi_alloc_queue_work(bdi, &args);
} }
/* /*
...@@ -595,7 +584,7 @@ static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc, ...@@ -595,7 +584,7 @@ static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
/* /*
* Caller must already hold the ref for this * Caller must already hold the ref for this
*/ */
if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) { if (wbc->sync_mode == WB_SYNC_ALL) {
WARN_ON(!rwsem_is_locked(&sb->s_umount)); WARN_ON(!rwsem_is_locked(&sb->s_umount));
return SB_NOT_PINNED; return SB_NOT_PINNED;
} }
...@@ -769,7 +758,6 @@ static long wb_writeback(struct bdi_writeback *wb, ...@@ -769,7 +758,6 @@ static long wb_writeback(struct bdi_writeback *wb,
.for_kupdate = args->for_kupdate, .for_kupdate = args->for_kupdate,
.for_background = args->for_background, .for_background = args->for_background,
.range_cyclic = args->range_cyclic, .range_cyclic = args->range_cyclic,
.sb_pinned = args->sb_pinned,
}; };
unsigned long oldest_jif; unsigned long oldest_jif;
long wrote = 0; long wrote = 0;
...@@ -912,7 +900,6 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) ...@@ -912,7 +900,6 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
while ((work = get_next_work_item(bdi, wb)) != NULL) { while ((work = get_next_work_item(bdi, wb)) != NULL) {
struct wb_writeback_args args = work->args; struct wb_writeback_args args = work->args;
int post_clear;
/* /*
* Override sync mode, in case we must wait for completion * Override sync mode, in case we must wait for completion
...@@ -920,13 +907,11 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) ...@@ -920,13 +907,11 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
if (force_wait) if (force_wait)
work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; work->args.sync_mode = args.sync_mode = WB_SYNC_ALL;
post_clear = WB_SYNC_ALL || args.sb_pinned;
/* /*
* If this isn't a data integrity operation, just notify * If this isn't a data integrity operation, just notify
* that we have seen this work and we are now starting it. * that we have seen this work and we are now starting it.
*/ */
if (!post_clear) if (args.sync_mode == WB_SYNC_NONE)
wb_clear_pending(wb, work); wb_clear_pending(wb, work);
wrote += wb_writeback(wb, &args); wrote += wb_writeback(wb, &args);
...@@ -935,7 +920,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) ...@@ -935,7 +920,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
* This is a data integrity writeback, so only do the * This is a data integrity writeback, so only do the
* notification when we have completed the work. * notification when we have completed the work.
*/ */
if (post_clear) if (args.sync_mode == WB_SYNC_ALL)
wb_clear_pending(wb, work); wb_clear_pending(wb, work);
} }
...@@ -1011,7 +996,7 @@ static void bdi_writeback_all(struct super_block *sb, long nr_pages) ...@@ -1011,7 +996,7 @@ static void bdi_writeback_all(struct super_block *sb, long nr_pages)
if (!bdi_has_dirty_io(bdi)) if (!bdi_has_dirty_io(bdi))
continue; continue;
bdi_alloc_queue_work(bdi, &args, 0); bdi_alloc_queue_work(bdi, &args);
} }
rcu_read_unlock(); rcu_read_unlock();
...@@ -1220,18 +1205,6 @@ static void wait_sb_inodes(struct super_block *sb) ...@@ -1220,18 +1205,6 @@ static void wait_sb_inodes(struct super_block *sb)
iput(old_inode); iput(old_inode);
} }
static void __writeback_inodes_sb(struct super_block *sb, int sb_locked)
{
unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
long nr_to_write;
nr_to_write = nr_dirty + nr_unstable +
(inodes_stat.nr_inodes - inodes_stat.nr_unused);
bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked);
}
/** /**
* writeback_inodes_sb - writeback dirty inodes from given super_block * writeback_inodes_sb - writeback dirty inodes from given super_block
* @sb: the superblock * @sb: the superblock
...@@ -1243,21 +1216,16 @@ static void __writeback_inodes_sb(struct super_block *sb, int sb_locked) ...@@ -1243,21 +1216,16 @@ static void __writeback_inodes_sb(struct super_block *sb, int sb_locked)
*/ */
void writeback_inodes_sb(struct super_block *sb) void writeback_inodes_sb(struct super_block *sb)
{ {
__writeback_inodes_sb(sb, 0); unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
} unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
EXPORT_SYMBOL(writeback_inodes_sb); long nr_to_write;
/** nr_to_write = nr_dirty + nr_unstable +
* writeback_inodes_sb_locked - writeback dirty inodes from given super_block (inodes_stat.nr_inodes - inodes_stat.nr_unused);
* @sb: the superblock
* bdi_start_writeback(sb->s_bdi, sb, nr_to_write);
* Like writeback_inodes_sb(), except the caller already holds the
* sb umount sem.
*/
void writeback_inodes_sb_locked(struct super_block *sb)
{
__writeback_inodes_sb(sb, 1);
} }
EXPORT_SYMBOL(writeback_inodes_sb);
/** /**
* writeback_inodes_sb_if_idle - start writeback if none underway * writeback_inodes_sb_if_idle - start writeback if none underway
......
...@@ -26,9 +26,14 @@ ...@@ -26,9 +26,14 @@
/* /*
* The max size that a non-root user is allowed to grow the pipe. Can * The max size that a non-root user is allowed to grow the pipe. Can
* be set by root in /proc/sys/fs/pipe-max-pages * be set by root in /proc/sys/fs/pipe-max-size
*/ */
unsigned int pipe_max_pages = PIPE_DEF_BUFFERS * 16; unsigned int pipe_max_size = 1048576;
/*
* Minimum pipe size, as required by POSIX
*/
unsigned int pipe_min_size = PAGE_SIZE;
/* /*
* We use a start+len construction, which provides full use of the * We use a start+len construction, which provides full use of the
...@@ -1118,26 +1123,20 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes) ...@@ -1118,26 +1123,20 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes)
* Allocate a new array of pipe buffers and copy the info over. Returns the * Allocate a new array of pipe buffers and copy the info over. Returns the
* pipe size if successful, or return -ERROR on error. * pipe size if successful, or return -ERROR on error.
*/ */
static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
{ {
struct pipe_buffer *bufs; struct pipe_buffer *bufs;
/*
* Must be a power-of-2 currently
*/
if (!is_power_of_2(arg))
return -EINVAL;
/* /*
* We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't
* expect a lot of shrink+grow operations, just free and allocate * expect a lot of shrink+grow operations, just free and allocate
* again like we would do for growing. If the pipe currently * again like we would do for growing. If the pipe currently
* contains more buffers than arg, then return busy. * contains more buffers than arg, then return busy.
*/ */
if (arg < pipe->nrbufs) if (nr_pages < pipe->nrbufs)
return -EBUSY; return -EBUSY;
bufs = kcalloc(arg, sizeof(struct pipe_buffer), GFP_KERNEL); bufs = kcalloc(nr_pages, sizeof(struct pipe_buffer), GFP_KERNEL);
if (unlikely(!bufs)) if (unlikely(!bufs))
return -ENOMEM; return -ENOMEM;
...@@ -1158,8 +1157,37 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) ...@@ -1158,8 +1157,37 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
pipe->curbuf = 0; pipe->curbuf = 0;
kfree(pipe->bufs); kfree(pipe->bufs);
pipe->bufs = bufs; pipe->bufs = bufs;
pipe->buffers = arg; pipe->buffers = nr_pages;
return arg; return nr_pages * PAGE_SIZE;
}
/*
* Currently we rely on the pipe array holding a power-of-2 number
* of pages.
*/
static inline unsigned int round_pipe_size(unsigned int size)
{
unsigned long nr_pages;
nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
return roundup_pow_of_two(nr_pages) << PAGE_SHIFT;
}
/*
* This should work even if CONFIG_PROC_FS isn't set, as proc_dointvec_minmax
* will return an error.
*/
int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
size_t *lenp, loff_t *ppos)
{
int ret;
ret = proc_dointvec_minmax(table, write, buf, lenp, ppos);
if (ret < 0 || !write)
return ret;
pipe_max_size = round_pipe_size(pipe_max_size);
return ret;
} }
long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
...@@ -1174,23 +1202,24 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) ...@@ -1174,23 +1202,24 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
mutex_lock(&pipe->inode->i_mutex); mutex_lock(&pipe->inode->i_mutex);
switch (cmd) { switch (cmd) {
case F_SETPIPE_SZ: case F_SETPIPE_SZ: {
if (!capable(CAP_SYS_ADMIN) && arg > pipe_max_pages) { unsigned int size, nr_pages;
ret = -EINVAL;
size = round_pipe_size(arg);
nr_pages = size >> PAGE_SHIFT;
if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) {
ret = -EPERM;
goto out; goto out;
} } else if (nr_pages < PAGE_SIZE) {
/*
* The pipe needs to be at least 2 pages large to
* guarantee POSIX behaviour.
*/
if (arg < 2) {
ret = -EINVAL; ret = -EINVAL;
goto out; goto out;
} }
ret = pipe_set_size(pipe, arg); ret = pipe_set_size(pipe, nr_pages);
break; break;
}
case F_GETPIPE_SZ: case F_GETPIPE_SZ:
ret = pipe->buffers; ret = pipe->buffers * PAGE_SIZE;
break; break;
default: default:
ret = -EINVAL; ret = -EINVAL;
......
...@@ -354,7 +354,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, ...@@ -354,7 +354,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
break; break;
error = add_to_page_cache_lru(page, mapping, index, error = add_to_page_cache_lru(page, mapping, index,
mapping_gfp_mask(mapping)); GFP_KERNEL);
if (unlikely(error)) { if (unlikely(error)) {
page_cache_release(page); page_cache_release(page);
if (error == -EEXIST) if (error == -EEXIST)
......
...@@ -42,7 +42,7 @@ static int __sync_filesystem(struct super_block *sb, int wait) ...@@ -42,7 +42,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
if (wait) if (wait)
sync_inodes_sb(sb); sync_inodes_sb(sb);
else else
writeback_inodes_sb_locked(sb); writeback_inodes_sb(sb);
if (sb->s_op->sync_fs) if (sb->s_op->sync_fs)
sb->s_op->sync_fs(sb, wait); sb->s_op->sync_fs(sb, wait);
......
...@@ -106,7 +106,7 @@ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); ...@@ -106,7 +106,7 @@ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
void bdi_unregister(struct backing_dev_info *bdi); void bdi_unregister(struct backing_dev_info *bdi);
int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int); int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int);
void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
long nr_pages, int sb_locked); long nr_pages);
int bdi_writeback_task(struct bdi_writeback *wb); int bdi_writeback_task(struct bdi_writeback *wb);
int bdi_has_dirty_io(struct backing_dev_info *bdi); int bdi_has_dirty_io(struct backing_dev_info *bdi);
void bdi_arm_supers_timer(void); void bdi_arm_supers_timer(void);
......
...@@ -1211,14 +1211,23 @@ struct work_struct; ...@@ -1211,14 +1211,23 @@ struct work_struct;
int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
#ifdef CONFIG_BLK_CGROUP #ifdef CONFIG_BLK_CGROUP
/*
* This should not be using sched_clock(). A real patch is in progress
* to fix this up, until that is in place we need to disable preemption
* around sched_clock() in this function and set_io_start_time_ns().
*/
static inline void set_start_time_ns(struct request *req) static inline void set_start_time_ns(struct request *req)
{ {
preempt_disable();
req->start_time_ns = sched_clock(); req->start_time_ns = sched_clock();
preempt_enable();
} }
static inline void set_io_start_time_ns(struct request *req) static inline void set_io_start_time_ns(struct request *req)
{ {
preempt_disable();
req->io_start_time_ns = sched_clock(); req->io_start_time_ns = sched_clock();
preempt_enable();
} }
static inline uint64_t rq_start_time_ns(struct request *req) static inline uint64_t rq_start_time_ns(struct request *req)
......
...@@ -53,7 +53,7 @@ ...@@ -53,7 +53,7 @@
extern const char *drbd_buildtag(void); extern const char *drbd_buildtag(void);
#define REL_VERSION "8.3.8rc1" #define REL_VERSION "8.3.8rc2"
#define API_VERSION 88 #define API_VERSION 88
#define PRO_VERSION_MIN 86 #define PRO_VERSION_MIN 86
#define PRO_VERSION_MAX 94 #define PRO_VERSION_MAX 94
......
...@@ -7,7 +7,6 @@ ...@@ -7,7 +7,6 @@
struct cfq_queue; struct cfq_queue;
struct cfq_io_context { struct cfq_io_context {
void *key; void *key;
unsigned long dead_key;
struct cfq_queue *cfqq[2]; struct cfq_queue *cfqq[2];
......
...@@ -139,7 +139,9 @@ void pipe_lock(struct pipe_inode_info *); ...@@ -139,7 +139,9 @@ void pipe_lock(struct pipe_inode_info *);
void pipe_unlock(struct pipe_inode_info *); void pipe_unlock(struct pipe_inode_info *);
void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *); void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
extern unsigned int pipe_max_pages; extern unsigned int pipe_max_size, pipe_min_size;
int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *);
/* Drop the inode semaphore and wait for a pipe event, atomically */ /* Drop the inode semaphore and wait for a pipe event, atomically */
void pipe_wait(struct pipe_inode_info *pipe); void pipe_wait(struct pipe_inode_info *pipe);
......
...@@ -65,15 +65,6 @@ struct writeback_control { ...@@ -65,15 +65,6 @@ struct writeback_control {
* so we use a single control to update them * so we use a single control to update them
*/ */
unsigned no_nrwrite_index_update:1; unsigned no_nrwrite_index_update:1;
/*
* For WB_SYNC_ALL, the sb must always be pinned. For WB_SYNC_NONE,
* the writeback code will pin the sb for the caller. However,
* for eg umount, the caller does WB_SYNC_NONE but already has
* the sb pinned. If the below is set, caller already has the
* sb pinned.
*/
unsigned sb_pinned:1;
}; };
/* /*
...@@ -82,7 +73,6 @@ struct writeback_control { ...@@ -82,7 +73,6 @@ struct writeback_control {
struct bdi_writeback; struct bdi_writeback;
int inode_wait(void *); int inode_wait(void *);
void writeback_inodes_sb(struct super_block *); void writeback_inodes_sb(struct super_block *);
void writeback_inodes_sb_locked(struct super_block *);
int writeback_inodes_sb_if_idle(struct super_block *); int writeback_inodes_sb_if_idle(struct super_block *);
void sync_inodes_sb(struct super_block *); void sync_inodes_sb(struct super_block *);
void writeback_inodes_wbc(struct writeback_control *wbc); void writeback_inodes_wbc(struct writeback_control *wbc);
......
...@@ -1471,12 +1471,12 @@ static struct ctl_table fs_table[] = { ...@@ -1471,12 +1471,12 @@ static struct ctl_table fs_table[] = {
}, },
#endif #endif
{ {
.procname = "pipe-max-pages", .procname = "pipe-max-size",
.data = &pipe_max_pages, .data = &pipe_max_size,
.maxlen = sizeof(int), .maxlen = sizeof(int),
.mode = 0644, .mode = 0644,
.proc_handler = &proc_dointvec_minmax, .proc_handler = &pipe_proc_fn,
.extra1 = &two, .extra1 = &pipe_min_size,
}, },
/* /*
* NOTE: do not add new entries to this table unless you have read * NOTE: do not add new entries to this table unless you have read
......
...@@ -597,7 +597,7 @@ static void balance_dirty_pages(struct address_space *mapping, ...@@ -597,7 +597,7 @@ static void balance_dirty_pages(struct address_space *mapping,
(!laptop_mode && ((global_page_state(NR_FILE_DIRTY) (!laptop_mode && ((global_page_state(NR_FILE_DIRTY)
+ global_page_state(NR_UNSTABLE_NFS)) + global_page_state(NR_UNSTABLE_NFS))
> background_thresh))) > background_thresh)))
bdi_start_writeback(bdi, NULL, 0, 0); bdi_start_writeback(bdi, NULL, 0);
} }
void set_page_dirty_balance(struct page *page, int page_mkwrite) void set_page_dirty_balance(struct page *page, int page_mkwrite)
...@@ -707,7 +707,7 @@ void laptop_mode_timer_fn(unsigned long data) ...@@ -707,7 +707,7 @@ void laptop_mode_timer_fn(unsigned long data)
*/ */
if (bdi_has_dirty_io(&q->backing_dev_info)) if (bdi_has_dirty_io(&q->backing_dev_info))
bdi_start_writeback(&q->backing_dev_info, NULL, nr_pages, 0); bdi_start_writeback(&q->backing_dev_info, NULL, nr_pages);
} }
/* /*
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册