RHEL6: NFSv3 client hung, NFS state shows TCP connection up, and one or more NFS ops retransmitted over and over
Environment
- Red Hat Enterprise Linux 6 (NFS client)
- seen on kernel-2.6.32-358.18.1.el6
- NFSv3
Issue
- NFS client system is hung and one or more NFS3 operations are being transmitted over and over
Resolution
- Contact NFS server vendor for further troubleshooting.
Root Cause
- NFS server appears to be repeatedly returning a retryable error to the NFS client operations. The NFS client re-sends these NFS operations only to get another retryable error and this continues on indefinitely.
Diagnostic Steps
Analysis with a tcpdump
- Obtain a tcpdump at the time of the hung system.
- Inside the tcpdump, look for the same NFSv3 operation being sent, and completing with a retryable error code.
Analysis with a vmcore
- Configure kdump and gather a vmcore.
- Inside the vmcore, look at the oldest blocked tasks, and the NFS state.
- The oldest blocked tasks will somehow be blocked on NFS, whether directly or indirectly.
- The NFS state inside the vmcore will show that the transport (TCP) is TCP_CONNECTED, and there are one or more NFS operations which have been retransitted over and over (rq_ntrans very high), but never had a major timeout (tk_timeouts == 0). This indicates the NFS server is responding to each request, but with a non-success status and a retryable error code.
- Detailed vmcore analysis: oldest blocked task analysis
crash> ps -m | grep UN
[0 00:00:00.000] [UN] PID: 19056 TASK: ffff8808063c9500 CPU: 2 COMMAND: "mount.nfs"
[0 00:00:00.000] [UN] PID: 19169 TASK: ffff880806d90ae0 CPU: 6 COMMAND: "df"
[0 00:00:00.000] [UN] PID: 9029 TASK: ffff880830072040 CPU: 3 COMMAND: "sh"
[0 00:00:19.042] [UN] PID: 19195 TASK: ffff88081a2c2080 CPU: 3 COMMAND: "mount.nfs"
[0 04:56:18.403] [UN] PID: 9030 TASK: ffff88081e2b8040 CPU: 5 COMMAND: "sh"
crash> bt 9030
PID: 9030 TASK: ffff88081e2b8040 CPU: 5 COMMAND: "sh"
#0 [ffff88082fadbc78] schedule at ffffffff8150e0e2
#1 [ffff88082fadbd40] __mutex_lock_slowpath at ffffffff8150f78e
#2 [ffff88082fadbdb0] mutex_lock at ffffffff8150f62b
#3 [ffff88082fadbdd0] do_filp_open at ffffffff81192e67
#4 [ffff88082fadbf20] do_sys_open at ffffffff8117e249
#5 [ffff88082fadbf70] sys_open at ffffffff8117e360
#6 [ffff88082fadbf80] system_call_fastpath at ffffffff8100b072
crash> bt -f 9030| grep "mutex_lock at"
#2 [ffff88082fadbdb0] mutex_lock at ffffffff8150f62b
crash> px (0xffff88082fadbdb0 - 6*8)
$1 = 0xffff88082fadbd80
crash> rd 0xffff88082fadbd80
ffff88082fadbd80: ffff880814e5bbb0 ........
crash> px ((struct mutex *)0xffff880814e5bbb0)->owner->task
$2 = (struct task_struct *) 0xffff880830072040
crash> bt 0xffff880830072040
PID: 9029 TASK: ffff880830072040 CPU: 3 COMMAND: "sh"
#0 [ffff88082e147978] schedule at ffffffff8150e0e2
#1 [ffff88082e147a40] rpc_wait_bit_killable at ffffffffa02774e2 [sunrpc]
#2 [ffff88082e147a50] __wait_on_bit at ffffffff8150f27f
#3 [ffff88082e147aa0] out_of_line_wait_on_bit at ffffffff8150f328
#4 [ffff88082e147b10] __rpc_execute at ffffffffa0277a25 [sunrpc]
#5 [ffff88082e147b80] rpc_execute at ffffffffa0277ce1 [sunrpc]
#6 [ffff88082e147bb0] rpc_run_task at ffffffffa026e3a5 [sunrpc]
#7 [ffff88082e147bd0] rpc_call_sync at ffffffffa026e4c2 [sunrpc]
#8 [ffff88082e147c30] nfs3_rpc_wrapper.clone.0 at ffffffffa03d7f6d [nfs]
#9 [ffff88082e147c70] nfs3_proc_lookup at ffffffffa03d81e6 [nfs]
#10 [ffff88082e147d10] nfs_lookup at ffffffffa03bf7ff [nfs]
#11 [ffff88082e147d60] __lookup_hash at ffffffff8118e4c2
#12 [ffff88082e147db0] lookup_hash at ffffffff8118eb6a
#13 [ffff88082e147dd0] do_filp_open at ffffffff81192e6f
#14 [ffff88082e147f20] do_sys_open at ffffffff8117e249
#15 [ffff88082e147f70] sys_open at ffffffff8117e360
crash> print __rpc_execute
$3 = {void (struct rpc_task *)} 0xffffffffa0277930 <__rpc_execute>
crash> dis -r ffffffffa0277ce1 | tail -3
0xffffffffa0277cd9 <rpc_execute+89>: mov %rbx,%rdi
0xffffffffa0277cdc <rpc_execute+92>: callq 0xffffffffa0277930 <__rpc_execute>
0xffffffffa0277ce1 <rpc_execute+97>: mov -0x18(%rbp),%rbx
crash> dis __rpc_execute | head
0xffffffffa0277930 <__rpc_execute>: push %rbp
0xffffffffa0277931 <__rpc_execute+1>: mov %rsp,%rbp
0xffffffffa0277934 <__rpc_execute+4>: push %r15
0xffffffffa0277936 <__rpc_execute+6>: push %r14
0xffffffffa0277938 <__rpc_execute+8>: push %r13
0xffffffffa027793a <__rpc_execute+10>: push %r12
0xffffffffa027793c <__rpc_execute+12>: push %rbx
0xffffffffa027793d <__rpc_execute+13>: sub $0x38,%rsp
0xffffffffa0277941 <__rpc_execute+17>: nopl 0x0(%rax,%rax,1)
0xffffffffa0277946 <__rpc_execute+22>: movzwl 0xd0(%rdi),%edx
crash> bt -f 9029 | grep " rpc_execute at"
#5 [ffff88082e147b80] rpc_execute at ffffffffa0277ce1 [sunrpc]
crash> px (0xffff88082e147b80 - 6*8)
$4 = 0xffff88082e147b50
crash> rd 0xffff88082e147b50
ffff88082e147b50: ffff880826a380c0 ...&....
crash> kmem 0xffff880826a380c0
CACHE NAME OBJSIZE ALLOCATED TOTAL SLABS SSIZE
ffff880825dd2ec0 rpc_tasks 256 12 60 4 4k
SLAB MEMORY TOTAL ALLOCATED FREE
ffff880826a38040 ffff880826a380c0 15 2 13
FREE / [ALLOCATED]
[ffff880826a380c0]
PAGE PHYSICAL MAPPING INDEX CNT FLAGS
ffffea001c873c40 826a38000 0 ffff88082e0ad9c0 1 40000000000080 slab
crash> p (struct rpc_task *)0xffff880826a380c0
$8 = (struct rpc_task *) 0xffff880826a380c0
crash> p $8->tk_msg.rpc_proc->p_name
$13 = 0xffffffffa0403f78 "LOOKUP"
crash> p $8->tk_timeouts
$14 = 0
crash> p $8->tk_rqstp->rq_ntrans
$15 = 5326663
- Now from the rpc_task, look at the transport protocol, and the transport state.
crash> p $8->tk_rqstp->rq_xprt
$17 = (struct rpc_xprt *) 0xffff880832bb1000
crash> p ((struct sock_xprt *)0xffff880832bb1000)->inet
$18 = (struct sock *) 0xffff880807d098c0
crash> p $8->tk_rqstp->rq_xprt->prot
$21 = 6
crash> p $18->__sk_common.skc_state
$22 = 1 '\001'
crash> p (struct tcp_sock *)0xffff880807d098c0
$19 = (struct tcp_sock *) 0xffff880807d098c0
- Note the above 'prot' value of '6' is 'TCP' and is from include/linux/in.h
* Standard well-defined IP protocols. */
enum {
IPPROTO_IP = 0, /* Dummy protocol for TCP */
IPPROTO_ICMP = 1, /* Internet Control Message Protocol */
IPPROTO_IGMP = 2, /* Internet Group Management Protocol */
IPPROTO_IPIP = 4, /* IPIP tunnels (older KA9Q tunnels use 94) */
IPPROTO_TCP = 6, /* Transmission Control Protocol */
IPPROTO_EGP = 8, /* Exterior Gateway Protocol */
IPPROTO_PUP = 12, /* PUP protocol */
IPPROTO_UDP = 17, /* User Datagram Protocol */
IPPROTO_IDP = 22, /* XNS IDP protocol */
IPPROTO_DCCP = 33, /* Datagram Congestion Control Protocol */
IPPROTO_RSVP = 46, /* RSVP protocol */
IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */
IPPROTO_IPV6 = 41, /* IPv6-in-IPv4 tunnelling */
IPPROTO_ESP = 50, /* Encapsulation Security Payload protocol */
IPPROTO_AH = 51, /* Authentication Header protocol */
IPPROTO_BEETPH = 94, /* IP option pseudo header for BEET */
IPPROTO_PIM = 103, /* Protocol Independent Multicast */
IPPROTO_COMP = 108, /* Compression Header protocol */
IPPROTO_SCTP = 132, /* Stream Control Transport Protocol */
IPPROTO_UDPLITE = 136, /* UDP-Lite (RFC 3828) */
IPPROTO_RAW = 255, /* Raw IP packets */
IPPROTO_MAX
};
- Note the above
skc_state
value is from the TCP state enum in include/net/tcp_states.h. A value of '1' above means the transport is in a connected state:TCP_ESTABLISHED
.
enum {
TCP_ESTABLISHED = 1,
TCP_SYN_SENT,
TCP_SYN_RECV,
TCP_FIN_WAIT1,
TCP_FIN_WAIT2,
TCP_TIME_WAIT,
TCP_CLOSE,
TCP_CLOSE_WAIT,
TCP_LAST_ACK,
TCP_LISTEN,
TCP_CLOSING, /* Now a valid state */
TCP_MAX_STATES /* Leave at the end! */
};
This solution is part of Red Hat’s fast-track publication program, providing a huge library of solutions that Red Hat engineers have created while supporting our customers. To give you the knowledge you need the instant it becomes available, these articles may be presented in a raw and unedited form.
Comments