提交 e7f52dfb 编写于 作者: L Lars Ellenberg 提交者: Jens Axboe

drbd: revert "delay probes", feature is being re-implemented differently

It was a now abandoned attempt to throttle resync bandwidth
based on the delay it causes on the bulk data socket.
It has no userbase yet, and has been disabled by
9173465ccb51c09cc3102a10af93e9f469a0af6f already.
This removes the now unused code.

The basic feature, namely using up "idle" bandwith
of network and disk IO subsystem, with minimal impact
to application IO, is being reimplemented differently.
Signed-off-by: NPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: NLars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: NJens Axboe <jaxboe@fusionio.com>
上级 85f4cc17
......@@ -550,12 +550,6 @@ struct p_delay_probe {
u32 offset; /* usecs the probe got sent after the reference time point */
} __packed;
struct delay_probe {
struct list_head list;
unsigned int seq_num;
struct timeval time;
};
/* DCBP: Drbd Compressed Bitmap Packet ... */
static inline enum drbd_bitmap_code
DCBP_get_code(struct p_compressed_bm *p)
......@@ -942,11 +936,9 @@ struct drbd_conf {
unsigned int ko_count;
struct drbd_work resync_work,
unplug_work,
md_sync_work,
delay_probe_work;
md_sync_work;
struct timer_list resync_timer;
struct timer_list md_sync_timer;
struct timer_list delay_probe_timer;
/* Used after attach while negotiating new disk state. */
union drbd_state new_state_tmp;
......@@ -1062,12 +1054,6 @@ struct drbd_conf {
u64 ed_uuid; /* UUID of the exposed data */
struct mutex state_mutex;
char congestion_reason; /* Why we where congested... */
struct list_head delay_probes; /* protected by peer_seq_lock */
int data_delay; /* Delay of packets on the data-sock behind meta-sock */
unsigned int delay_seq; /* To generate sequence numbers of delay probes */
struct timeval dps_time; /* delay-probes-start-time */
unsigned int dp_volume_last; /* send_cnt of last delay probe */
int c_sync_rate; /* current resync rate after delay_probe magic */
};
static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
......
......@@ -2184,43 +2184,6 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size)
return ok;
}
static int drbd_send_delay_probe(struct drbd_conf *mdev, struct drbd_socket *ds)
{
struct p_delay_probe dp;
int offset, ok = 0;
struct timeval now;
mutex_lock(&ds->mutex);
if (likely(ds->socket)) {
do_gettimeofday(&now);
offset = now.tv_usec - mdev->dps_time.tv_usec +
(now.tv_sec - mdev->dps_time.tv_sec) * 1000000;
dp.seq_num = cpu_to_be32(mdev->delay_seq);
dp.offset = cpu_to_be32(offset);
ok = _drbd_send_cmd(mdev, ds->socket, P_DELAY_PROBE,
(struct p_header *)&dp, sizeof(dp), 0);
}
mutex_unlock(&ds->mutex);
return ok;
}
static int drbd_send_delay_probes(struct drbd_conf *mdev)
{
int ok;
mdev->delay_seq++;
do_gettimeofday(&mdev->dps_time);
ok = drbd_send_delay_probe(mdev, &mdev->meta);
ok = ok && drbd_send_delay_probe(mdev, &mdev->data);
mdev->dp_volume_last = mdev->send_cnt;
mod_timer(&mdev->delay_probe_timer, jiffies + mdev->sync_conf.dp_interval * HZ / 10);
return ok;
}
/* called on sndtimeo
* returns FALSE if we should retry,
* TRUE if we think connection is dead
......@@ -2369,27 +2332,6 @@ static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e)
return 1;
}
static void consider_delay_probes(struct drbd_conf *mdev)
{
return;
}
static int w_delay_probes(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
{
if (!cancel && mdev->state.conn == C_SYNC_SOURCE)
drbd_send_delay_probes(mdev);
return 1;
}
static void delay_probe_timer_fn(unsigned long data)
{
struct drbd_conf *mdev = (struct drbd_conf *) data;
if (list_empty(&mdev->delay_probe_work.list))
drbd_queue_work(&mdev->data.work, &mdev->delay_probe_work);
}
/* Used to send write requests
* R_PRIMARY -> Peer (P_DATA)
*/
......@@ -2453,9 +2395,6 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
drbd_put_data_sock(mdev);
if (ok)
consider_delay_probes(mdev);
return ok;
}
......@@ -2502,9 +2441,6 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
drbd_put_data_sock(mdev);
if (ok)
consider_delay_probes(mdev);
return ok;
}
......@@ -2666,10 +2602,6 @@ static void drbd_set_defaults(struct drbd_conf *mdev)
/* .rate = */ DRBD_RATE_DEF,
/* .after = */ DRBD_AFTER_DEF,
/* .al_extents = */ DRBD_AL_EXTENTS_DEF,
/* .dp_volume = */ DRBD_DP_VOLUME_DEF,
/* .dp_interval = */ DRBD_DP_INTERVAL_DEF,
/* .throttle_th = */ DRBD_RS_THROTTLE_TH_DEF,
/* .hold_off_th = */ DRBD_RS_HOLD_OFF_TH_DEF,
/* .verify_alg = */ {}, 0,
/* .cpu_mask = */ {}, 0,
/* .csums_alg = */ {}, 0,
......@@ -2736,24 +2668,17 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
INIT_LIST_HEAD(&mdev->unplug_work.list);
INIT_LIST_HEAD(&mdev->md_sync_work.list);
INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
INIT_LIST_HEAD(&mdev->delay_probes);
INIT_LIST_HEAD(&mdev->delay_probe_work.list);
mdev->resync_work.cb = w_resync_inactive;
mdev->unplug_work.cb = w_send_write_hint;
mdev->md_sync_work.cb = w_md_sync;
mdev->bm_io_work.w.cb = w_bitmap_io;
mdev->delay_probe_work.cb = w_delay_probes;
init_timer(&mdev->resync_timer);
init_timer(&mdev->md_sync_timer);
init_timer(&mdev->delay_probe_timer);
mdev->resync_timer.function = resync_timer_fn;
mdev->resync_timer.data = (unsigned long) mdev;
mdev->md_sync_timer.function = md_sync_timer_fn;
mdev->md_sync_timer.data = (unsigned long) mdev;
mdev->delay_probe_timer.function = delay_probe_timer_fn;
mdev->delay_probe_timer.data = (unsigned long) mdev;
init_waitqueue_head(&mdev->misc_wait);
init_waitqueue_head(&mdev->state_wait);
......
......@@ -1557,10 +1557,6 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
sc.rate = DRBD_RATE_DEF;
sc.after = DRBD_AFTER_DEF;
sc.al_extents = DRBD_AL_EXTENTS_DEF;
sc.dp_volume = DRBD_DP_VOLUME_DEF;
sc.dp_interval = DRBD_DP_INTERVAL_DEF;
sc.throttle_th = DRBD_RS_THROTTLE_TH_DEF;
sc.hold_off_th = DRBD_RS_HOLD_OFF_TH_DEF;
} else
memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf));
......
......@@ -73,21 +73,14 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
seq_printf(seq, "sync'ed:%3u.%u%% ", res / 10, res % 10);
/* if more than 1 GB display in MB */
if (mdev->rs_total > 0x100000L)
seq_printf(seq, "(%lu/%lu)M",
seq_printf(seq, "(%lu/%lu)M\n\t",
(unsigned long) Bit2KB(rs_left >> 10),
(unsigned long) Bit2KB(mdev->rs_total >> 10));
else
seq_printf(seq, "(%lu/%lu)K",
seq_printf(seq, "(%lu/%lu)K\n\t",
(unsigned long) Bit2KB(rs_left),
(unsigned long) Bit2KB(mdev->rs_total));
if (mdev->state.conn == C_SYNC_TARGET)
seq_printf(seq, " queue_delay: %d.%d ms\n\t",
mdev->data_delay / 1000,
(mdev->data_delay % 1000) / 100);
else if (mdev->state.conn == C_SYNC_SOURCE)
seq_printf(seq, " delay_probe: %u\n\t", mdev->delay_seq);
/* see drivers/md/md.c
* We do not want to overflow, so the order of operands and
* the * 100 / 100 trick are important. We do a +1 to be
......@@ -135,14 +128,6 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
else
seq_printf(seq, " (%ld)", dbdt);
if (mdev->state.conn == C_SYNC_TARGET) {
if (mdev->c_sync_rate > 1000)
seq_printf(seq, " want: %d,%03d",
mdev->c_sync_rate / 1000, mdev->c_sync_rate % 1000);
else
seq_printf(seq, " want: %d", mdev->c_sync_rate);
}
seq_printf(seq, " K/sec\n");
}
......
......@@ -3555,14 +3555,15 @@ static int receive_bitmap(struct drbd_conf *mdev, struct p_header *h)
return ok;
}
static int receive_skip(struct drbd_conf *mdev, struct p_header *h)
static int receive_skip_(struct drbd_conf *mdev, struct p_header *h, int silent)
{
/* TODO zero copy sink :) */
static char sink[128];
int size, want, r;
dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
h->command, h->length);
if (!silent)
dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
h->command, h->length);
size = h->length;
while (size > 0) {
......@@ -3574,101 +3575,25 @@ static int receive_skip(struct drbd_conf *mdev, struct p_header *h)
return size == 0;
}
static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header *h)
{
if (mdev->state.disk >= D_INCONSISTENT)
drbd_kick_lo(mdev);
/* Make sure we've acked all the TCP data associated
* with the data requests being unplugged */
drbd_tcp_quickack(mdev->data.socket);
return TRUE;
}
static void timeval_sub_us(struct timeval* tv, unsigned int us)
static int receive_skip(struct drbd_conf *mdev, struct p_header *h)
{
tv->tv_sec -= us / 1000000;
us = us % 1000000;
if (tv->tv_usec > us) {
tv->tv_usec += 1000000;
tv->tv_sec--;
}
tv->tv_usec -= us;
return receive_skip_(mdev, h, 0);
}
static void got_delay_probe(struct drbd_conf *mdev, int from, struct p_delay_probe *p)
static int receive_skip_silent(struct drbd_conf *mdev, struct p_header *h)
{
struct delay_probe *dp;
struct list_head *le;
struct timeval now;
int seq_num;
int offset;
int data_delay;
seq_num = be32_to_cpu(p->seq_num);
offset = be32_to_cpu(p->offset);
spin_lock(&mdev->peer_seq_lock);
if (!list_empty(&mdev->delay_probes)) {
if (from == USE_DATA_SOCKET)
le = mdev->delay_probes.next;
else
le = mdev->delay_probes.prev;
dp = list_entry(le, struct delay_probe, list);
if (dp->seq_num == seq_num) {
list_del(le);
spin_unlock(&mdev->peer_seq_lock);
do_gettimeofday(&now);
timeval_sub_us(&now, offset);
data_delay =
now.tv_usec - dp->time.tv_usec +
(now.tv_sec - dp->time.tv_sec) * 1000000;
if (data_delay > 0)
mdev->data_delay = data_delay;
kfree(dp);
return;
}
if (dp->seq_num > seq_num) {
spin_unlock(&mdev->peer_seq_lock);
dev_warn(DEV, "Previous allocation failure of struct delay_probe?\n");
return; /* Do not alloca a struct delay_probe.... */
}
}
spin_unlock(&mdev->peer_seq_lock);
dp = kmalloc(sizeof(struct delay_probe), GFP_NOIO);
if (!dp) {
dev_warn(DEV, "Failed to allocate a struct delay_probe, do not worry.\n");
return;
}
dp->seq_num = seq_num;
do_gettimeofday(&dp->time);
timeval_sub_us(&dp->time, offset);
spin_lock(&mdev->peer_seq_lock);
if (from == USE_DATA_SOCKET)
list_add(&dp->list, &mdev->delay_probes);
else
list_add_tail(&dp->list, &mdev->delay_probes);
spin_unlock(&mdev->peer_seq_lock);
return receive_skip_(mdev, h, 1);
}
static int receive_delay_probe(struct drbd_conf *mdev, struct p_header *h)
static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header *h)
{
struct p_delay_probe *p = (struct p_delay_probe *)h;
if (mdev->state.disk >= D_INCONSISTENT)
drbd_kick_lo(mdev);
ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
if (drbd_recv(mdev, h->payload, h->length) != h->length)
return FALSE;
/* Make sure we've acked all the TCP data associated
* with the data requests being unplugged */
drbd_tcp_quickack(mdev->data.socket);
got_delay_probe(mdev, USE_DATA_SOCKET, p);
return TRUE;
}
......@@ -3695,7 +3620,7 @@ static drbd_cmd_handler_f drbd_default_handler[] = {
[P_OV_REQUEST] = receive_DataRequest,
[P_OV_REPLY] = receive_DataRequest,
[P_CSUM_RS_REQUEST] = receive_DataRequest,
[P_DELAY_PROBE] = receive_delay_probe,
[P_DELAY_PROBE] = receive_skip_silent,
/* anything missing from this table is in
* the asender_tbl, see get_asender_cmd */
[P_MAX_CMD] = NULL,
......@@ -4472,11 +4397,9 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header *h)
return TRUE;
}
static int got_delay_probe_m(struct drbd_conf *mdev, struct p_header *h)
static int got_something_to_ignore_m(struct drbd_conf *mdev, struct p_header *h)
{
struct p_delay_probe *p = (struct p_delay_probe *)h;
got_delay_probe(mdev, USE_META_SOCKET, p);
/* IGNORE */
return TRUE;
}
......@@ -4504,7 +4427,7 @@ static struct asender_cmd *get_asender_cmd(int cmd)
[P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
[P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
[P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
[P_DELAY_PROBE] = { sizeof(struct p_delay_probe), got_delay_probe_m },
[P_DELAY_PROBE] = { sizeof(struct p_delay_probe), got_something_to_ignore_m },
[P_MAX_CMD] = { 0, NULL },
};
if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL)
......
......@@ -424,18 +424,6 @@ void resync_timer_fn(unsigned long data)
drbd_queue_work(&mdev->data.work, &mdev->resync_work);
}
static int calc_resync_rate(struct drbd_conf *mdev)
{
int d = mdev->data_delay / 1000; /* us -> ms */
int td = mdev->sync_conf.throttle_th * 100; /* 0.1s -> ms */
int hd = mdev->sync_conf.hold_off_th * 100; /* 0.1s -> ms */
int cr = mdev->sync_conf.rate;
return d <= td ? cr :
d >= hd ? 0 :
cr + (cr * (td - d) / (hd - td));
}
int w_make_resync_request(struct drbd_conf *mdev,
struct drbd_work *w, int cancel)
{
......@@ -473,8 +461,7 @@ int w_make_resync_request(struct drbd_conf *mdev,
max_segment_size = mdev->agreed_pro_version < 94 ?
queue_max_segment_size(mdev->rq_queue) : DRBD_MAX_SEGMENT_SIZE;
mdev->c_sync_rate = calc_resync_rate(mdev);
number = SLEEP_TIME * mdev->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
number = SLEEP_TIME * mdev->sync_conf.rate / ((BM_BLOCK_SIZE / 1024) * HZ);
pe = atomic_read(&mdev->rs_pending_cnt);
mutex_lock(&mdev->data.mutex);
......
......@@ -53,7 +53,7 @@
extern const char *drbd_buildtag(void);
#define REL_VERSION "8.3.8"
#define REL_VERSION "8.3.8.1"
#define API_VERSION 88
#define PRO_VERSION_MIN 86
#define PRO_VERSION_MAX 94
......
......@@ -78,10 +78,11 @@ NL_PACKET(syncer_conf, 8,
NL_INTEGER( 30, T_MAY_IGNORE, rate)
NL_INTEGER( 31, T_MAY_IGNORE, after)
NL_INTEGER( 32, T_MAY_IGNORE, al_extents)
NL_INTEGER( 71, T_MAY_IGNORE, dp_volume)
NL_INTEGER( 72, T_MAY_IGNORE, dp_interval)
NL_INTEGER( 73, T_MAY_IGNORE, throttle_th)
NL_INTEGER( 74, T_MAY_IGNORE, hold_off_th)
/* NL_INTEGER( 71, T_MAY_IGNORE, dp_volume)
* NL_INTEGER( 72, T_MAY_IGNORE, dp_interval)
* NL_INTEGER( 73, T_MAY_IGNORE, throttle_th)
* NL_INTEGER( 74, T_MAY_IGNORE, hold_off_th)
* feature will be reimplemented differently with 8.3.9 */
NL_STRING( 52, T_MAY_IGNORE, verify_alg, SHARED_SECRET_MAX)
NL_STRING( 51, T_MAY_IGNORE, cpu_mask, 32)
NL_STRING( 64, T_MAY_IGNORE, csums_alg, SHARED_SECRET_MAX)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册