summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhen Wei <zwei@novell.com>2007-01-23 17:19:59 -0800
committerMark Fasheh <mark.fasheh@oracle.com>2007-02-07 12:15:11 -0800
commit925037bcba7691db2403684141a276930ad184f3 (patch)
tree5a928f3d3f8488d1094a4ced8f39228c9d5a8ca9
parentf71aa8a55a0ae1a0d06c6079265d16502a678e8e (diff)
downloadlinux-3.10-925037bcba7691db2403684141a276930ad184f3.tar.gz
linux-3.10-925037bcba7691db2403684141a276930ad184f3.tar.bz2
linux-3.10-925037bcba7691db2403684141a276930ad184f3.zip
ocfs2: introduce sc->sc_send_lock to protect outbound outbound messages
When there is a lot of multithreaded I/O usage, two threads can collide while sending out a message to the other nodes. This is due to the lack of locking between threads while sending out the messages. When a connected TCP send(), sendto(), or sendmsg() arrives in the Linux kernel, it eventually comes through tcp_sendmsg(). tcp_sendmsg() protects itself by acquiring a lock at invocation by calling lock_sock(). tcp_sendmsg() then loops over the buffers in the iovec, allocating associated sk_buff's and cache pages for use in the actual send. As it does so, it pushes the data out to tcp for actual transmission. However, if one of those allocation fails (because a large number of large sends is being processed, for example), it must wait for memory to become available. It does so by jumping to wait_for_sndbuf or wait_for_memory, both of which eventually cause a call to sk_stream_wait_memory(). sk_stream_wait_memory() contains a code path that calls sk_wait_event(). Finally, sk_wait_event() contains the call to release_sock(). The following patch adds a lock to the socket container in order to properly serialize outbound requests. From: Zhen Wei <zwei@novell.com> Acked-by: Jeff Mahoney <jeffm@suse.com> Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
-rw-r--r--fs/ocfs2/cluster/tcp.c8
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h2
2 files changed, 10 insertions, 0 deletions
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 2021aec7cbb..1718215fc01 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -556,6 +556,8 @@ static void o2net_register_callbacks(struct sock *sk,
sk->sk_data_ready = o2net_data_ready;
sk->sk_state_change = o2net_state_change;
+ mutex_init(&sc->sc_send_lock);
+
write_unlock_bh(&sk->sk_callback_lock);
}
@@ -858,10 +860,12 @@ static void o2net_sendpage(struct o2net_sock_container *sc,
ssize_t ret;
+ mutex_lock(&sc->sc_send_lock);
ret = sc->sc_sock->ops->sendpage(sc->sc_sock,
virt_to_page(kmalloced_virt),
(long)kmalloced_virt & ~PAGE_MASK,
size, MSG_DONTWAIT);
+ mutex_unlock(&sc->sc_send_lock);
if (ret != size) {
mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT
" failed with %zd\n", size, SC_NODEF_ARGS(sc), ret);
@@ -976,8 +980,10 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
/* finally, convert the message header to network byte-order
* and send */
+ mutex_lock(&sc->sc_send_lock);
ret = o2net_send_tcp_msg(sc->sc_sock, vec, veclen,
sizeof(struct o2net_msg) + caller_bytes);
+ mutex_unlock(&sc->sc_send_lock);
msglog(msg, "sending returned %d\n", ret);
if (ret < 0) {
mlog(0, "error returned from o2net_send_tcp_msg=%d\n", ret);
@@ -1109,8 +1115,10 @@ static int o2net_process_message(struct o2net_sock_container *sc,
out_respond:
/* this destroys the hdr, so don't use it after this */
+ mutex_lock(&sc->sc_send_lock);
ret = o2net_send_status_magic(sc->sc_sock, hdr, syserr,
handler_status);
+ mutex_unlock(&sc->sc_send_lock);
hdr = NULL;
mlog(0, "sending handler status %d, syserr %d returned %d\n",
handler_status, syserr, ret);
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 177927a8f00..4dae5df5e46 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -155,6 +155,8 @@ struct o2net_sock_container {
struct timeval sc_tv_func_stop;
u32 sc_msg_key;
u16 sc_msg_type;
+
+ struct mutex sc_send_lock;
};
struct o2net_msg_handler {