提交 ac33d071 编写于 作者: P Patrick Caulfield 提交者: Steven Whitehouse

[DLM] Clean up lowcomms

This fixes up most of the things pointed out by akpm and Pavel Machek
with comments below indicating why some things have been left:

Andrew Morton wrote:
>
>> +static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc)
>> +{
>> +	struct nodeinfo *ni;
>> +	int r;
>> +	int n;
>> +
>> +	down_read(&nodeinfo_lock);
>
> Given that this function can sleep, I wonder if `alloc' is useful.
>
> I see lots of callers passing in a literal "0" for `alloc'.  That's in fact
> a secret (GFP_ATOMIC & ~__GFP_HIGH).  I doubt if that's what you really
> meant.  Particularly as the code could at least have used __GFP_WAIT (aka
> GFP_NOIO) which is much, much more reliable than "0".  In fact "0" is the
> least reliable mode possible.
>
> IOW, this is all bollixed up.

When 0 is passed into nodeid2nodeinfo the function does not try to allocate a
new structure at all. it's an indication that the caller only wants the nodeinfo
struct for that nodeid if there actually is one in existance.
I've tidied the function itself so it's more obvious, (and tidier!)

>> +/* Data received from remote end */
>> +static int receive_from_sock(void)
>> +{
>> +	int ret = 0;
>> +	struct msghdr msg;
>> +	struct kvec iov[2];
>> +	unsigned len;
>> +	int r;
>> +	struct sctp_sndrcvinfo *sinfo;
>> +	struct cmsghdr *cmsg;
>> +	struct nodeinfo *ni;
>> +
>> +	/* These two are marginally too big for stack allocation, but this
>> +	 * function is (currently) only called by dlm_recvd so static should be
>> +	 * OK.
>> +	 */
>> +	static struct sockaddr_storage msgname;
>> +	static char incmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
>
> whoa.  This is globally singly-threaded code??

Yes. it is only ever run in the context of dlm_recvd.
>>
>> +static void initiate_association(int nodeid)
>> +{
>> +	struct sockaddr_storage rem_addr;
>> +	static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
>
> Another static buffer to worry about.  Globally singly-threaded code?

Yes. Only ever called by dlm_sendd.

>> +
>> +/* Send a message */
>> +static int send_to_sock(struct nodeinfo *ni)
>> +{
>> +	int ret = 0;
>> +	struct writequeue_entry *e;
>> +	int len, offset;
>> +	struct msghdr outmsg;
>> +	static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))];
>
> Singly-threaded?

Yep.

>>
>> +static void dealloc_nodeinfo(void)
>> +{
>> +	int i;
>> +
>> +	for (i=1; i<=max_nodeid; i++) {
>> +		struct nodeinfo *ni = nodeid2nodeinfo(i, 0);
>> +		if (ni) {
>> +			idr_remove(&nodeinfo_idr, i);
>
> Didn't that need locking?

Not. it's only ever called at DLM shutdown after all the other threads
have been stopped.

>>
>> +static int write_list_empty(void)
>> +{
>> +	int status;
>> +
>> +	spin_lock_bh(&write_nodes_lock);
>> +	status = list_empty(&write_nodes);
>> +	spin_unlock_bh(&write_nodes_lock);
>> +
>> +	return status;
>> +}
>
> This function's return value is meaningless.  As soon as the lock gets
> dropped, the return value can get out of sync with reality.
>
> Looking at the caller, this _might_ happen to be OK, but it's a nasty and
> dangerous thing.  Really the locking should be moved into the caller.

It's just an optimisation to allow the caller to schedule if there is no work
to do. if something arrives immediately afterwards then it will get picked up
when the process re-awakes (and it will be woken by that arrival).

The 'accepting' atomic has gone completely. as Andrew pointed out it didn't
really achieve much anyway. I suspect it was a plaster over some other
startup or shutdown bug to be honest.
Signed-off-by: NPatrick Caulfield <pcaulfie@redhat.com>
Signed-off-by: NSteven Whitehouse <swhiteho@redhat.com>
Cc: Andrew Morton <akpm@osdl.org>
Cc: Pavel Machek <pavel@ucw.cz>
上级 34126f9f
......@@ -2,7 +2,7 @@
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
** Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
......@@ -75,13 +75,13 @@ struct nodeinfo {
};
static DEFINE_IDR(nodeinfo_idr);
static struct rw_semaphore nodeinfo_lock;
static DECLARE_RWSEM(nodeinfo_lock);
static int max_nodeid;
struct cbuf {
unsigned base;
unsigned len;
unsigned mask;
unsigned int base;
unsigned int len;
unsigned int mask;
};
/* Just the one of these, now. But this struct keeps
......@@ -110,28 +110,32 @@ struct writequeue_entry {
struct nodeinfo *ni;
};
#define CBUF_ADD(cb, n) do { (cb)->len += n; } while(0)
#define CBUF_EMPTY(cb) ((cb)->len == 0)
#define CBUF_MAY_ADD(cb, n) (((cb)->len + (n)) < ((cb)->mask + 1))
#define CBUF_DATA(cb) (((cb)->base + (cb)->len) & (cb)->mask)
static void cbuf_add(struct cbuf *cb, int n)
{
cb->len += n;
}
#define CBUF_INIT(cb, size) \
do { \
(cb)->base = (cb)->len = 0; \
(cb)->mask = ((size)-1); \
} while(0)
static int cbuf_data(struct cbuf *cb)
{
return ((cb->base + cb->len) & cb->mask);
}
#define CBUF_EAT(cb, n) \
do { \
(cb)->len -= (n); \
(cb)->base += (n); \
(cb)->base &= (cb)->mask; \
} while(0)
static void cbuf_init(struct cbuf *cb, int size)
{
cb->base = cb->len = 0;
cb->mask = size-1;
}
static void cbuf_eat(struct cbuf *cb, int n)
{
cb->len -= n;
cb->base += n;
cb->base &= cb->mask;
}
/* List of nodes which have writes pending */
static struct list_head write_nodes;
static spinlock_t write_nodes_lock;
static LIST_HEAD(write_nodes);
static DEFINE_SPINLOCK(write_nodes_lock);
/* Maximum number of incoming messages to process before
* doing a schedule()
......@@ -141,8 +145,7 @@ static spinlock_t write_nodes_lock;
/* Manage daemons */
static struct task_struct *recv_task;
static struct task_struct *send_task;
static wait_queue_head_t lowcomms_recv_wait;
static atomic_t accepting;
static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_wait);
/* The SCTP connection */
static struct connection sctp_con;
......@@ -174,6 +177,8 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
return 0;
}
/* If alloc is 0 here we will not attempt to allocate a new
nodeinfo struct */
static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc)
{
struct nodeinfo *ni;
......@@ -184,7 +189,9 @@ static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc)
ni = idr_find(&nodeinfo_idr, nodeid);
up_read(&nodeinfo_lock);
if (!ni && alloc) {
if (ni || !alloc)
return ni;
down_write(&nodeinfo_lock);
ni = idr_find(&nodeinfo_idr, nodeid);
......@@ -219,9 +226,8 @@ static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc)
if (nodeid > max_nodeid)
max_nodeid = nodeid;
out_up:
out_up:
up_write(&nodeinfo_lock);
}
return ni;
}
......@@ -324,7 +330,7 @@ static void send_shutdown(sctp_assoc_t associd)
cmsg->cmsg_type = SCTP_SNDRCV;
cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
outmessage.msg_controllen = cmsg->cmsg_len;
sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
sinfo = CMSG_DATA(cmsg);
memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
sinfo->sinfo_flags |= MSG_EOF;
......@@ -398,15 +404,18 @@ static void process_sctp_notification(struct msghdr *msg, char *buf)
fs = get_fs();
set_fs(get_ds());
ret = sctp_con.sock->ops->getsockopt(sctp_con.sock,
IPPROTO_SCTP, SCTP_PRIMARY_ADDR,
(char*)&prim, &prim_len);
IPPROTO_SCTP,
SCTP_PRIMARY_ADDR,
(char*)&prim,
&prim_len);
set_fs(fs);
if (ret < 0) {
struct nodeinfo *ni;
log_print("getsockopt/sctp_primary_addr on "
"new assoc %d failed : %d",
(int)sn->sn_assoc_change.sac_assoc_id, ret);
(int)sn->sn_assoc_change.sac_assoc_id,
ret);
/* Retry INIT later */
ni = assoc2nodeinfo(sn->sn_assoc_change.sac_assoc_id);
......@@ -426,9 +435,7 @@ static void process_sctp_notification(struct msghdr *msg, char *buf)
return;
/* Save the assoc ID */
spin_lock(&ni->lock);
ni->assoc_id = sn->sn_assoc_change.sac_assoc_id;
spin_unlock(&ni->lock);
log_print("got new/restarted association %d nodeid %d",
(int)sn->sn_assoc_change.sac_assoc_id, nodeid);
......@@ -507,13 +514,12 @@ static int receive_from_sock(void)
sctp_con.rx_page = alloc_page(GFP_ATOMIC);
if (sctp_con.rx_page == NULL)
goto out_resched;
CBUF_INIT(&sctp_con.cb, PAGE_CACHE_SIZE);
cbuf_init(&sctp_con.cb, PAGE_CACHE_SIZE);
}
memset(&incmsg, 0, sizeof(incmsg));
memset(&msgname, 0, sizeof(msgname));
memset(incmsg, 0, sizeof(incmsg));
msg.msg_name = &msgname;
msg.msg_namelen = sizeof(msgname);
msg.msg_flags = 0;
......@@ -532,17 +538,17 @@ static int receive_from_sock(void)
* iov[0] is the bit of the circular buffer between the current end
* point (cb.base + cb.len) and the end of the buffer.
*/
iov[0].iov_len = sctp_con.cb.base - CBUF_DATA(&sctp_con.cb);
iov[0].iov_len = sctp_con.cb.base - cbuf_data(&sctp_con.cb);
iov[0].iov_base = page_address(sctp_con.rx_page) +
CBUF_DATA(&sctp_con.cb);
cbuf_data(&sctp_con.cb);
iov[1].iov_len = 0;
/*
* iov[1] is the bit of the circular buffer between the start of the
* buffer and the start of the currently used section (cb.base)
*/
if (CBUF_DATA(&sctp_con.cb) >= sctp_con.cb.base) {
iov[0].iov_len = PAGE_CACHE_SIZE - CBUF_DATA(&sctp_con.cb);
if (cbuf_data(&sctp_con.cb) >= sctp_con.cb.base) {
iov[0].iov_len = PAGE_CACHE_SIZE - cbuf_data(&sctp_con.cb);
iov[1].iov_len = sctp_con.cb.base;
iov[1].iov_base = page_address(sctp_con.rx_page);
msg.msg_iovlen = 2;
......@@ -557,7 +563,7 @@ static int receive_from_sock(void)
msg.msg_control = incmsg;
msg.msg_controllen = sizeof(incmsg);
cmsg = CMSG_FIRSTHDR(&msg);
sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
sinfo = CMSG_DATA(cmsg);
if (msg.msg_flags & MSG_NOTIFICATION) {
process_sctp_notification(&msg, page_address(sctp_con.rx_page));
......@@ -583,29 +589,29 @@ static int receive_from_sock(void)
if (r == 1)
return 0;
CBUF_ADD(&sctp_con.cb, ret);
cbuf_add(&sctp_con.cb, ret);
ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid),
page_address(sctp_con.rx_page),
sctp_con.cb.base, sctp_con.cb.len,
PAGE_CACHE_SIZE);
if (ret < 0)
goto out_close;
CBUF_EAT(&sctp_con.cb, ret);
cbuf_eat(&sctp_con.cb, ret);
out:
out:
ret = 0;
goto out_ret;
out_resched:
out_resched:
lowcomms_data_ready(sctp_con.sock->sk, 0);
ret = 0;
schedule();
cond_resched();
goto out_ret;
out_close:
out_close:
if (ret != -EAGAIN)
log_print("error reading from sctp socket: %d", ret);
out_ret:
out_ret:
return ret;
}
......@@ -619,10 +625,12 @@ static int add_bind_addr(struct sockaddr_storage *addr, int addr_len, int num)
set_fs(get_ds());
if (num == 1)
result = sctp_con.sock->ops->bind(sctp_con.sock,
(struct sockaddr *) addr, addr_len);
(struct sockaddr *) addr,
addr_len);
else
result = sctp_con.sock->ops->setsockopt(sctp_con.sock, SOL_SCTP,
SCTP_SOCKOPT_BINDX_ADD, (char *)addr, addr_len);
SCTP_SOCKOPT_BINDX_ADD,
(char *)addr, addr_len);
set_fs(fs);
if (result < 0)
......@@ -719,10 +727,10 @@ static int init_sock(void)
return 0;
create_delsock:
create_delsock:
sock_release(sock);
sctp_con.sock = NULL;
out:
out:
return result;
}
......@@ -756,16 +764,13 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
int users = 0;
struct nodeinfo *ni;
if (!atomic_read(&accepting))
return NULL;
ni = nodeid2nodeinfo(nodeid, allocation);
if (!ni)
return NULL;
spin_lock(&ni->writequeue_lock);
e = list_entry(ni->writequeue.prev, struct writequeue_entry, list);
if (((struct list_head *) e == &ni->writequeue) ||
if ((&e->list == &ni->writequeue) ||
(PAGE_CACHE_SIZE - e->end < len)) {
e = NULL;
} else {
......@@ -803,9 +808,6 @@ void dlm_lowcomms_commit_buffer(void *arg)
int users;
struct nodeinfo *ni = e->ni;
if (!atomic_read(&accepting))
return;
spin_lock(&ni->writequeue_lock);
users = --e->users;
if (users)
......@@ -822,7 +824,7 @@ void dlm_lowcomms_commit_buffer(void *arg)
}
return;
out:
out:
spin_unlock(&ni->writequeue_lock);
return;
}
......@@ -878,7 +880,7 @@ static void initiate_association(int nodeid)
cmsg->cmsg_level = IPPROTO_SCTP;
cmsg->cmsg_type = SCTP_SNDRCV;
cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
sinfo = CMSG_DATA(cmsg);
memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
sinfo->sinfo_ppid = cpu_to_le32(dlm_local_nodeid);
......@@ -892,7 +894,7 @@ static void initiate_association(int nodeid)
}
/* Send a message */
static int send_to_sock(struct nodeinfo *ni)
static void send_to_sock(struct nodeinfo *ni)
{
int ret = 0;
struct writequeue_entry *e;
......@@ -909,7 +911,7 @@ static int send_to_sock(struct nodeinfo *ni)
if (!ni->assoc_id && !test_and_set_bit(NI_INIT_PENDING, &ni->flags)) {
spin_unlock(&ni->lock);
initiate_association(ni->nodeid);
return 0;
return;
}
spin_unlock(&ni->lock);
......@@ -923,7 +925,7 @@ static int send_to_sock(struct nodeinfo *ni)
cmsg->cmsg_level = IPPROTO_SCTP;
cmsg->cmsg_type = SCTP_SNDRCV;
cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo));
sinfo = (struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
sinfo = CMSG_DATA(cmsg);
memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo));
sinfo->sinfo_ppid = cpu_to_le32(dlm_local_nodeid);
sinfo->sinfo_assoc_id = ni->assoc_id;
......@@ -955,7 +957,7 @@ static int send_to_sock(struct nodeinfo *ni)
goto send_error;
} else {
/* Don't starve people filling buffers */
schedule();
cond_resched();
}
spin_lock(&ni->writequeue_lock);
......@@ -964,15 +966,16 @@ static int send_to_sock(struct nodeinfo *ni)
if (e->len == 0 && e->users == 0) {
list_del(&e->list);
kunmap(e->page);
free_entry(e);
continue;
}
}
spin_unlock(&ni->writequeue_lock);
out:
return ret;
out:
return;
send_error:
send_error:
log_print("Error sending to node %d %d", ni->nodeid, ret);
spin_lock(&ni->lock);
if (!test_and_set_bit(NI_INIT_PENDING, &ni->flags)) {
......@@ -982,7 +985,7 @@ static int send_to_sock(struct nodeinfo *ni)
} else
spin_unlock(&ni->lock);
return ret;
return;
}
/* Try to send any messages that are pending */
......@@ -1106,7 +1109,7 @@ static int dlm_recvd(void *data)
set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(&lowcomms_recv_wait, &wait);
if (!test_bit(CF_READ_PENDING, &sctp_con.flags))
schedule();
cond_resched();
remove_wait_queue(&lowcomms_recv_wait, &wait);
set_current_state(TASK_RUNNING);
......@@ -1118,12 +1121,12 @@ static int dlm_recvd(void *data)
/* Don't starve out everyone else */
if (++count >= MAX_RX_MSG_COUNT) {
schedule();
cond_resched();
count = 0;
}
} while (!kthread_should_stop() && ret >=0);
}
schedule();
cond_resched();
}
return 0;
......@@ -1138,7 +1141,7 @@ static int dlm_sendd(void *data)
while (!kthread_should_stop()) {
set_current_state(TASK_INTERRUPTIBLE);
if (write_list_empty())
schedule();
cond_resched();
set_current_state(TASK_RUNNING);
if (sctp_con.eagain_flag) {
......@@ -1197,43 +1200,28 @@ int dlm_lowcomms_start(void)
error = daemons_start();
if (error)
goto fail_sock;
atomic_set(&accepting, 1);
return 0;
fail_sock:
fail_sock:
close_connection();
return error;
}
/* Set all the activity flags to prevent any socket activity. */
void dlm_lowcomms_stop(void)
{
atomic_set(&accepting, 0);
int i;
sctp_con.flags = 0x7;
daemons_stop();
clean_writequeues();
close_connection();
dealloc_nodeinfo();
max_nodeid = 0;
}
int dlm_lowcomms_init(void)
{
init_waitqueue_head(&lowcomms_recv_wait);
spin_lock_init(&write_nodes_lock);
INIT_LIST_HEAD(&write_nodes);
init_rwsem(&nodeinfo_lock);
return 0;
}
void dlm_lowcomms_exit(void)
{
int i;
dlm_local_count = 0;
dlm_local_nodeid = 0;
for (i = 0; i < dlm_local_count; i++)
kfree(dlm_local_addr[i]);
dlm_local_count = 0;
dlm_local_nodeid = 0;
}
......@@ -54,27 +54,42 @@
#include "config.h"
struct cbuf {
unsigned base;
unsigned len;
unsigned mask;
unsigned int base;
unsigned int len;
unsigned int mask;
};
#ifndef FALSE
#define FALSE 0
#define TRUE 1
#endif
#define NODE_INCREMENT 32
static void cbuf_add(struct cbuf *cb, int n)
{
cb->len += n;
}
static int cbuf_data(struct cbuf *cb)
{
return ((cb->base + cb->len) & cb->mask);
}
static void cbuf_init(struct cbuf *cb, int size)
{
cb->base = cb->len = 0;
cb->mask = size-1;
}
static void cbuf_eat(struct cbuf *cb, int n)
{
cb->len -= n;
cb->base += n;
cb->base &= cb->mask;
}
#define CBUF_INIT(cb, size) do { (cb)->base = (cb)->len = 0; (cb)->mask = ((size)-1); } while(0)
#define CBUF_ADD(cb, n) do { (cb)->len += n; } while(0)
#define CBUF_EMPTY(cb) ((cb)->len == 0)
#define CBUF_MAY_ADD(cb, n) (((cb)->len + (n)) < ((cb)->mask + 1))
#define CBUF_EAT(cb, n) do { (cb)->len -= (n); \
(cb)->base += (n); (cb)->base &= (cb)->mask; } while(0)
#define CBUF_DATA(cb) (((cb)->base + (cb)->len) & (cb)->mask)
static bool cbuf_empty(struct cbuf *cb)
{
return cb->len == 0;
}
/* Maximum number of incoming messages to process before
doing a schedule()
doing a cond_resched()
*/
#define MAX_RX_MSG_COUNT 25
......@@ -121,28 +136,27 @@ static struct task_struct *recv_task;
static struct task_struct *send_task;
static wait_queue_t lowcomms_send_waitq_head;
static wait_queue_head_t lowcomms_send_waitq;
static DECLARE_WAIT_QUEUE_HEAD(lowcomms_send_waitq);
static wait_queue_t lowcomms_recv_waitq_head;
static wait_queue_head_t lowcomms_recv_waitq;
static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_waitq);
/* An array of pointers to connections, indexed by NODEID */
static struct connection **connections;
static struct semaphore connections_lock;
static DECLARE_MUTEX(connections_lock);
static kmem_cache_t *con_cache;
static int conn_array_size;
static atomic_t accepting;
/* List of sockets that have reads pending */
static struct list_head read_sockets;
static spinlock_t read_sockets_lock;
static LIST_HEAD(read_sockets);
static DEFINE_SPINLOCK(read_sockets_lock);
/* List of sockets which have writes pending */
static struct list_head write_sockets;
static spinlock_t write_sockets_lock;
static LIST_HEAD(write_sockets);
static DEFINE_SPINLOCK(write_sockets_lock);
/* List of sockets which have connects pending */
static struct list_head state_sockets;
static spinlock_t state_sockets_lock;
static LIST_HEAD(state_sockets);
static DEFINE_SPINLOCK(state_sockets_lock);
static struct connection *nodeid2con(int nodeid, gfp_t allocation)
{
......@@ -153,12 +167,11 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation)
int new_size = nodeid + NODE_INCREMENT;
struct connection **new_conns;
new_conns = kmalloc(sizeof(struct connection *) *
new_conns = kzalloc(sizeof(struct connection *) *
new_size, allocation);
if (!new_conns)
goto finish;
memset(new_conns, 0, sizeof(struct connection *) * new_size);
memcpy(new_conns, connections, sizeof(struct connection *) * conn_array_size);
conn_array_size = new_size;
kfree(connections);
......@@ -168,11 +181,10 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation)
con = connections[nodeid];
if (con == NULL && allocation) {
con = kmem_cache_alloc(con_cache, allocation);
con = kmem_cache_zalloc(con_cache, allocation);
if (!con)
goto finish;
memset(con, 0, sizeof(*con));
con->nodeid = nodeid;
init_rwsem(&con->sock_sem);
INIT_LIST_HEAD(&con->writequeue);
......@@ -181,7 +193,7 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation)
connections[nodeid] = con;
}
finish:
finish:
up(&connections_lock);
return con;
}
......@@ -220,8 +232,6 @@ static inline void lowcomms_connect_sock(struct connection *con)
{
if (test_and_set_bit(CF_CONNECT_PENDING, &con->flags))
return;
if (!atomic_read(&accepting))
return;
spin_lock_bh(&state_sockets_lock);
list_add_tail(&con->state_list, &state_sockets);
......@@ -232,31 +242,8 @@ static inline void lowcomms_connect_sock(struct connection *con)
static void lowcomms_state_change(struct sock *sk)
{
/* struct connection *con = sock2con(sk); */
switch (sk->sk_state) {
case TCP_ESTABLISHED:
if (sk->sk_state == TCP_ESTABLISHED)
lowcomms_write_space(sk);
break;
case TCP_FIN_WAIT1:
case TCP_FIN_WAIT2:
case TCP_TIME_WAIT:
case TCP_CLOSE:
case TCP_CLOSE_WAIT:
case TCP_LAST_ACK:
case TCP_CLOSING:
/* FIXME: I think this causes more trouble than it solves.
lowcomms wil reconnect anyway when there is something to
send. This just attempts reconnection if a node goes down!
*/
/* lowcomms_connect_sock(con); */
break;
default:
printk("dlm: lowcomms_state_change: state=%d\n", sk->sk_state);
break;
}
}
/* Make a socket active */
......@@ -282,8 +269,7 @@ static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr;
in4_addr->sin_port = cpu_to_be16(port);
*addr_len = sizeof(struct sockaddr_in);
}
else {
} else {
struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)saddr;
in6_addr->sin6_port = cpu_to_be16(port);
*addr_len = sizeof(struct sockaddr_in6);
......@@ -291,7 +277,7 @@ static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
}
/* Close a remote connection and tidy up */
static void close_connection(struct connection *con, int and_other)
static void close_connection(struct connection *con, bool and_other)
{
down_write(&con->sock_sem);
......@@ -300,11 +286,8 @@ static void close_connection(struct connection *con, int and_other)
con->sock = NULL;
}
if (con->othercon && and_other) {
/* Argh! recursion in kernel code!
Actually, this isn't a list so it
will only re-enter once.
*/
close_connection(con->othercon, FALSE);
/* Will only re-enter once. */
close_connection(con->othercon, false);
}
if (con->rx_page) {
__free_page(con->rx_page);
......@@ -337,7 +320,7 @@ static int receive_from_sock(struct connection *con)
con->rx_page = alloc_page(GFP_ATOMIC);
if (con->rx_page == NULL)
goto out_resched;
CBUF_INIT(&con->cb, PAGE_CACHE_SIZE);
cbuf_init(&con->cb, PAGE_CACHE_SIZE);
}
msg.msg_control = NULL;
......@@ -352,16 +335,16 @@ static int receive_from_sock(struct connection *con)
* iov[0] is the bit of the circular buffer between the current end
* point (cb.base + cb.len) and the end of the buffer.
*/
iov[0].iov_len = con->cb.base - CBUF_DATA(&con->cb);
iov[0].iov_base = page_address(con->rx_page) + CBUF_DATA(&con->cb);
iov[0].iov_len = con->cb.base - cbuf_data(&con->cb);
iov[0].iov_base = page_address(con->rx_page) + cbuf_data(&con->cb);
iov[1].iov_len = 0;
/*
* iov[1] is the bit of the circular buffer between the start of the
* buffer and the start of the currently used section (cb.base)
*/
if (CBUF_DATA(&con->cb) >= con->cb.base) {
iov[0].iov_len = PAGE_CACHE_SIZE - CBUF_DATA(&con->cb);
if (cbuf_data(&con->cb) >= con->cb.base) {
iov[0].iov_len = PAGE_CACHE_SIZE - cbuf_data(&con->cb);
iov[1].iov_len = con->cb.base;
iov[1].iov_base = page_address(con->rx_page);
msg.msg_iovlen = 2;
......@@ -378,7 +361,7 @@ static int receive_from_sock(struct connection *con)
goto out_close;
if (ret == len)
call_again_soon = 1;
CBUF_ADD(&con->cb, ret);
cbuf_add(&con->cb, ret);
ret = dlm_process_incoming_buffer(con->nodeid,
page_address(con->rx_page),
con->cb.base, con->cb.len,
......@@ -391,35 +374,32 @@ static int receive_from_sock(struct connection *con)
}
if (ret < 0)
goto out_close;
CBUF_EAT(&con->cb, ret);
cbuf_eat(&con->cb, ret);
if (CBUF_EMPTY(&con->cb) && !call_again_soon) {
if (cbuf_empty(&con->cb) && !call_again_soon) {
__free_page(con->rx_page);
con->rx_page = NULL;
}
out:
out:
if (call_again_soon)
goto out_resched;
up_read(&con->sock_sem);
ret = 0;
goto out_ret;
return 0;
out_resched:
out_resched:
lowcomms_data_ready(con->sock->sk, 0);
up_read(&con->sock_sem);
ret = 0;
schedule();
goto out_ret;
cond_resched();
return 0;
out_close:
out_close:
up_read(&con->sock_sem);
if (ret != -EAGAIN && !test_bit(CF_IS_OTHERCON, &con->flags)) {
close_connection(con, FALSE);
close_connection(con, false);
/* Reconnect when there is something to send */
}
out_ret:
return ret;
}
......@@ -434,7 +414,8 @@ static int accept_from_sock(struct connection *con)
struct connection *newcon;
memset(&peeraddr, 0, sizeof(peeraddr));
result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, IPPROTO_TCP, &newsock);
result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM,
IPPROTO_TCP, &newsock);
if (result < 0)
return -ENOMEM;
......@@ -486,14 +467,13 @@ static int accept_from_sock(struct connection *con)
struct connection *othercon = newcon->othercon;
if (!othercon) {
othercon = kmem_cache_alloc(con_cache, GFP_KERNEL);
othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL);
if (!othercon) {
printk("dlm: failed to allocate incoming socket\n");
up_write(&newcon->sock_sem);
result = -ENOMEM;
goto accept_err;
}
memset(othercon, 0, sizeof(*othercon));
othercon->nodeid = nodeid;
othercon->rx_action = receive_from_sock;
init_rwsem(&othercon->sock_sem);
......@@ -523,7 +503,7 @@ static int accept_from_sock(struct connection *con)
return 0;
accept_err:
accept_err:
up_read(&con->sock_sem);
sock_release(newsock);
......@@ -533,7 +513,7 @@ static int accept_from_sock(struct connection *con)
}
/* Connect a new socket to its peer */
static int connect_to_sock(struct connection *con)
static void connect_to_sock(struct connection *con)
{
int result = -EHOSTUNREACH;
struct sockaddr_storage saddr;
......@@ -542,7 +522,7 @@ static int connect_to_sock(struct connection *con)
if (con->nodeid == 0) {
log_print("attempt to connect sock 0 foiled");
return 0;
return;
}
down_write(&con->sock_sem);
......@@ -556,7 +536,8 @@ static int connect_to_sock(struct connection *con)
}
/* Create a socket to communicate with */
result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM, IPPROTO_TCP, &sock);
result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM,
IPPROTO_TCP, &sock);
if (result < 0)
goto out_err;
......@@ -577,19 +558,10 @@ static int connect_to_sock(struct connection *con)
O_NONBLOCK);
if (result == -EINPROGRESS)
result = 0;
if (result != 0)
goto out_err;
out:
up_write(&con->sock_sem);
/*
* Returning an error here means we've given up trying to connect to
* a remote node, otherwise we return 0 and reschedule the connetion
* attempt
*/
return result;
if (result == 0)
goto out;
out_err:
out_err:
if (con->sock) {
sock_release(con->sock);
con->sock = NULL;
......@@ -604,10 +576,13 @@ static int connect_to_sock(struct connection *con)
lowcomms_connect_sock(con);
result = 0;
}
goto out;
out:
up_write(&con->sock_sem);
return;
}
static struct socket *create_listen_sock(struct connection *con, struct sockaddr_storage *saddr)
static struct socket *create_listen_sock(struct connection *con,
struct sockaddr_storage *saddr)
{
struct socket *sock = NULL;
mm_segment_t fs;
......@@ -629,10 +604,12 @@ static struct socket *create_listen_sock(struct connection *con, struct sockaddr
fs = get_fs();
set_fs(get_ds());
result = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *)&one, sizeof(one));
result = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
(char *)&one, sizeof(one));
set_fs(fs);
if (result < 0) {
printk("dlm: Failed to set SO_REUSEADDR on socket: result=%d\n",result);
printk("dlm: Failed to set SO_REUSEADDR on socket: result=%d\n",
result);
}
sock->sk->sk_user_data = con;
con->rx_action = accept_from_sock;
......@@ -652,7 +629,8 @@ static struct socket *create_listen_sock(struct connection *con, struct sockaddr
fs = get_fs();
set_fs(get_ds());
result = sock_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *)&one, sizeof(one));
result = sock_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
(char *)&one, sizeof(one));
set_fs(fs);
if (result < 0) {
printk("dlm: Set keepalive failed: %d\n", result);
......@@ -666,7 +644,7 @@ static struct socket *create_listen_sock(struct connection *con, struct sockaddr
goto create_out;
}
create_out:
create_out:
return sock;
}
......@@ -679,10 +657,6 @@ static int listen_for_all(void)
int result = -EINVAL;
/* We don't support multi-homed hosts */
memset(con, 0, sizeof(*con));
init_rwsem(&con->sock_sem);
spin_lock_init(&con->writequeue_lock);
INIT_LIST_HEAD(&con->writequeue);
set_bit(CF_IS_OTHERCON, &con->flags);
sock = create_listen_sock(con, &dlm_local_addr);
......@@ -731,16 +705,12 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len,
int offset = 0;
int users = 0;
if (!atomic_read(&accepting))
return NULL;
con = nodeid2con(nodeid, allocation);
if (!con)
return NULL;
spin_lock(&con->writequeue_lock);
e = list_entry(con->writequeue.prev, struct writequeue_entry, list);
if (((struct list_head *) e == &con->writequeue) ||
if ((&e->list == &con->writequeue) ||
(PAGE_CACHE_SIZE - e->end < len)) {
e = NULL;
} else {
......@@ -777,10 +747,6 @@ void dlm_lowcomms_commit_buffer(void *mh)
struct connection *con = e->con;
int users;
if (!atomic_read(&accepting))
return;
spin_lock(&con->writequeue_lock);
users = --e->users;
if (users)
goto out;
......@@ -797,7 +763,7 @@ void dlm_lowcomms_commit_buffer(void *mh)
}
return;
out:
out:
spin_unlock(&con->writequeue_lock);
return;
}
......@@ -809,7 +775,7 @@ static void free_entry(struct writequeue_entry *e)
}
/* Send a message */
static int send_to_sock(struct connection *con)
static void send_to_sock(struct connection *con)
{
int ret = 0;
ssize_t(*sendpage) (struct socket *, struct page *, int, size_t, int);
......@@ -846,7 +812,7 @@ static int send_to_sock(struct connection *con)
}
else {
/* Don't starve people filling buffers */
schedule();
cond_resched();
}
spin_lock(&con->writequeue_lock);
......@@ -855,25 +821,26 @@ static int send_to_sock(struct connection *con)
if (e->len == 0 && e->users == 0) {
list_del(&e->list);
kunmap(e->page);
free_entry(e);
continue;
}
}
spin_unlock(&con->writequeue_lock);
out:
out:
up_read(&con->sock_sem);
return ret;
return;
send_error:
send_error:
up_read(&con->sock_sem);
close_connection(con, FALSE);
close_connection(con, false);
lowcomms_connect_sock(con);
return ret;
return;
out_connect:
out_connect:
up_read(&con->sock_sem);
lowcomms_connect_sock(con);
return 0;
return;
}
static void clean_one_writequeue(struct connection *con)
......@@ -904,12 +871,12 @@ int dlm_lowcomms_close(int nodeid)
con = nodeid2con(nodeid, 0);
if (con) {
clean_one_writequeue(con);
close_connection(con, TRUE);
close_connection(con, true);
atomic_set(&con->waiting_requests, 0);
}
return 0;
out:
out:
return -1;
}
......@@ -959,7 +926,7 @@ static void process_sockets(void)
/* Don't starve out everyone else */
if (++count >= MAX_RX_MSG_COUNT) {
schedule();
cond_resched();
count = 0;
}
......@@ -977,7 +944,6 @@ static void process_output_queue(void)
{
struct list_head *list;
struct list_head *temp;
int ret;
spin_lock_bh(&write_sockets_lock);
list_for_each_safe(list, temp, &write_sockets) {
......@@ -987,10 +953,7 @@ static void process_output_queue(void)
list_del(&con->write_list);
spin_unlock_bh(&write_sockets_lock);
ret = send_to_sock(con);
if (ret < 0) {
}
send_to_sock(con);
spin_lock_bh(&write_sockets_lock);
}
spin_unlock_bh(&write_sockets_lock);
......@@ -1000,7 +963,6 @@ static void process_state_queue(void)
{
struct list_head *list;
struct list_head *temp;
int ret;
spin_lock_bh(&state_sockets_lock);
list_for_each_safe(list, temp, &state_sockets) {
......@@ -1010,9 +972,7 @@ static void process_state_queue(void)
clear_bit(CF_CONNECT_PENDING, &con->flags);
spin_unlock_bh(&state_sockets_lock);
ret = connect_to_sock(con);
if (ret < 0) {
}
connect_to_sock(con);
spin_lock_bh(&state_sockets_lock);
}
spin_unlock_bh(&state_sockets_lock);
......@@ -1046,14 +1006,13 @@ static int read_list_empty(void)
/* DLM Transport comms receive daemon */
static int dlm_recvd(void *data)
{
init_waitqueue_head(&lowcomms_recv_waitq);
init_waitqueue_entry(&lowcomms_recv_waitq_head, current);
add_wait_queue(&lowcomms_recv_waitq, &lowcomms_recv_waitq_head);
while (!kthread_should_stop()) {
set_current_state(TASK_INTERRUPTIBLE);
if (read_list_empty())
schedule();
cond_resched();
set_current_state(TASK_RUNNING);
process_sockets();
......@@ -1081,14 +1040,13 @@ static int write_and_state_lists_empty(void)
/* DLM Transport send daemon */
static int dlm_sendd(void *data)
{
init_waitqueue_head(&lowcomms_send_waitq);
init_waitqueue_entry(&lowcomms_send_waitq_head, current);
add_wait_queue(&lowcomms_send_waitq, &lowcomms_send_waitq_head);
while (!kthread_should_stop()) {
set_current_state(TASK_INTERRUPTIBLE);
if (write_and_state_lists_empty())
schedule();
cond_resched();
set_current_state(TASK_RUNNING);
process_state_queue();
......@@ -1141,21 +1099,20 @@ void dlm_lowcomms_stop(void)
{
int i;
atomic_set(&accepting, 0);
/* Set all the activity flags to prevent any
/* Set all the flags to prevent any
socket activity.
*/
for (i = 0; i < conn_array_size; i++) {
if (connections[i])
connections[i]->flags |= 0x7;
connections[i]->flags |= 0xFF;
}
daemons_stop();
clean_writequeues();
for (i = 0; i < conn_array_size; i++) {
if (connections[i]) {
close_connection(connections[i], TRUE);
close_connection(connections[i], true);
if (connections[i]->othercon)
kmem_cache_free(con_cache, connections[i]->othercon);
kmem_cache_free(con_cache, connections[i]);
......@@ -1173,24 +1130,12 @@ int dlm_lowcomms_start(void)
{
int error = 0;
error = -ENOTCONN;
/*
* Temporarily initialise the waitq head so that lowcomms_send_message
* doesn't crash if it gets called before the thread is fully
* initialised
*/
init_waitqueue_head(&lowcomms_send_waitq);
error = -ENOMEM;
connections = kmalloc(sizeof(struct connection *) *
connections = kzalloc(sizeof(struct connection *) *
NODE_INCREMENT, GFP_KERNEL);
if (!connections)
goto out;
memset(connections, 0,
sizeof(struct connection *) * NODE_INCREMENT);
conn_array_size = NODE_INCREMENT;
if (dlm_our_addr(&dlm_local_addr, 0)) {
......@@ -1203,7 +1148,8 @@ int dlm_lowcomms_start(void)
}
con_cache = kmem_cache_create("dlm_conn", sizeof(struct connection),
__alignof__(struct connection), 0, NULL, NULL);
__alignof__(struct connection), 0,
NULL, NULL);
if (!con_cache)
goto fail_free_conn;
......@@ -1217,40 +1163,20 @@ int dlm_lowcomms_start(void)
if (error)
goto fail_unlisten;
atomic_set(&accepting, 1);
return 0;
fail_unlisten:
close_connection(connections[0], 0);
fail_unlisten:
close_connection(connections[0], false);
kmem_cache_free(con_cache, connections[0]);
kmem_cache_destroy(con_cache);
fail_free_conn:
fail_free_conn:
kfree(connections);
out:
out:
return error;
}
int dlm_lowcomms_init(void)
{
INIT_LIST_HEAD(&read_sockets);
INIT_LIST_HEAD(&write_sockets);
INIT_LIST_HEAD(&state_sockets);
spin_lock_init(&read_sockets_lock);
spin_lock_init(&write_sockets_lock);
spin_lock_init(&state_sockets_lock);
init_MUTEX(&connections_lock);
return 0;
}
void dlm_lowcomms_exit(void)
{
}
/*
* Overrides for Emacs so that we follow Linus's tabbing style.
* Emacs will notice this stuff at the end of the file and automatically
......
......@@ -14,8 +14,6 @@
#ifndef __LOWCOMMS_DOT_H__
#define __LOWCOMMS_DOT_H__
int dlm_lowcomms_init(void);
void dlm_lowcomms_exit(void);
int dlm_lowcomms_start(void);
void dlm_lowcomms_stop(void);
int dlm_lowcomms_close(int nodeid);
......
......@@ -16,7 +16,6 @@
#include "lock.h"
#include "user.h"
#include "memory.h"
#include "lowcomms.h"
#include "config.h"
#ifdef CONFIG_DLM_DEBUG
......@@ -47,20 +46,14 @@ static int __init init_dlm(void)
if (error)
goto out_config;
error = dlm_lowcomms_init();
if (error)
goto out_debug;
error = dlm_user_init();
if (error)
goto out_lowcomms;
goto out_debug;
printk("DLM (built %s %s) installed\n", __DATE__, __TIME__);
return 0;
out_lowcomms:
dlm_lowcomms_exit();
out_debug:
dlm_unregister_debugfs();
out_config:
......@@ -76,7 +69,6 @@ static int __init init_dlm(void)
static void __exit exit_dlm(void)
{
dlm_user_exit();
dlm_lowcomms_exit();
dlm_config_exit();
dlm_memory_exit();
dlm_lockspace_exit();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册