RHEL6: NFSv3 client hung, NFS state shows TCP connection up, and one or more NFS ops retransmitted over and over

Solution In Progress - Updated -

Environment

  • Red Hat Enterprise Linux 6 (NFS client)
    • seen on kernel-2.6.32-358.18.1.el6
  • NFSv3

Issue

  • NFS client system is hung and one or more NFS3 operations are being transmitted over and over

Resolution

  • Contact NFS server vendor for further troubleshooting.

Root Cause

  • NFS server appears to be repeatedly returning a retryable error to the NFS client operations. The NFS client re-sends these NFS operations only to get another retryable error and this continues on indefinitely.

Diagnostic Steps

Analysis with a tcpdump

  • Obtain a tcpdump at the time of the hung system.
    • Inside the tcpdump, look for the same NFSv3 operation being sent, and completing with a retryable error code.

Analysis with a vmcore

  • Configure kdump and gather a vmcore.
  • Inside the vmcore, look at the oldest blocked tasks, and the NFS state.
    • The oldest blocked tasks will somehow be blocked on NFS, whether directly or indirectly.
    • The NFS state inside the vmcore will show that the transport (TCP) is TCP_CONNECTED, and there are one or more NFS operations which have been retransitted over and over (rq_ntrans very high), but never had a major timeout (tk_timeouts == 0). This indicates the NFS server is responding to each request, but with a non-success status and a retryable error code.
  • Detailed vmcore analysis: oldest blocked task analysis
crash> ps -m | grep UN
[0 00:00:00.000] [UN]  PID: 19056  TASK: ffff8808063c9500  CPU: 2   COMMAND: "mount.nfs"
[0 00:00:00.000] [UN]  PID: 19169  TASK: ffff880806d90ae0  CPU: 6   COMMAND: "df"
[0 00:00:00.000] [UN]  PID: 9029   TASK: ffff880830072040  CPU: 3   COMMAND: "sh"
[0 00:00:19.042] [UN]  PID: 19195  TASK: ffff88081a2c2080  CPU: 3   COMMAND: "mount.nfs"
[0 04:56:18.403] [UN]  PID: 9030   TASK: ffff88081e2b8040  CPU: 5   COMMAND: "sh"
crash> bt 9030
PID: 9030   TASK: ffff88081e2b8040  CPU: 5   COMMAND: "sh"
 #0 [ffff88082fadbc78] schedule at ffffffff8150e0e2
 #1 [ffff88082fadbd40] __mutex_lock_slowpath at ffffffff8150f78e
 #2 [ffff88082fadbdb0] mutex_lock at ffffffff8150f62b
 #3 [ffff88082fadbdd0] do_filp_open at ffffffff81192e67
 #4 [ffff88082fadbf20] do_sys_open at ffffffff8117e249
 #5 [ffff88082fadbf70] sys_open at ffffffff8117e360
 #6 [ffff88082fadbf80] system_call_fastpath at ffffffff8100b072
crash> bt -f 9030| grep "mutex_lock at"
 #2 [ffff88082fadbdb0] mutex_lock at ffffffff8150f62b
crash> px (0xffff88082fadbdb0 - 6*8)
$1 = 0xffff88082fadbd80
crash> rd 0xffff88082fadbd80
ffff88082fadbd80:  ffff880814e5bbb0                    ........
crash> px ((struct mutex *)0xffff880814e5bbb0)->owner->task
$2 = (struct task_struct *) 0xffff880830072040
crash> bt 0xffff880830072040
PID: 9029   TASK: ffff880830072040  CPU: 3   COMMAND: "sh"
 #0 [ffff88082e147978] schedule at ffffffff8150e0e2
 #1 [ffff88082e147a40] rpc_wait_bit_killable at ffffffffa02774e2 [sunrpc]
 #2 [ffff88082e147a50] __wait_on_bit at ffffffff8150f27f
 #3 [ffff88082e147aa0] out_of_line_wait_on_bit at ffffffff8150f328
 #4 [ffff88082e147b10] __rpc_execute at ffffffffa0277a25 [sunrpc]
 #5 [ffff88082e147b80] rpc_execute at ffffffffa0277ce1 [sunrpc]
 #6 [ffff88082e147bb0] rpc_run_task at ffffffffa026e3a5 [sunrpc]
 #7 [ffff88082e147bd0] rpc_call_sync at ffffffffa026e4c2 [sunrpc]
 #8 [ffff88082e147c30] nfs3_rpc_wrapper.clone.0 at ffffffffa03d7f6d [nfs]
 #9 [ffff88082e147c70] nfs3_proc_lookup at ffffffffa03d81e6 [nfs]
#10 [ffff88082e147d10] nfs_lookup at ffffffffa03bf7ff [nfs]
#11 [ffff88082e147d60] __lookup_hash at ffffffff8118e4c2
#12 [ffff88082e147db0] lookup_hash at ffffffff8118eb6a
#13 [ffff88082e147dd0] do_filp_open at ffffffff81192e6f
#14 [ffff88082e147f20] do_sys_open at ffffffff8117e249
#15 [ffff88082e147f70] sys_open at ffffffff8117e360

crash> print __rpc_execute
$3 = {void (struct rpc_task *)} 0xffffffffa0277930 <__rpc_execute>
crash> dis -r ffffffffa0277ce1 | tail -3
0xffffffffa0277cd9 <rpc_execute+89>:    mov    %rbx,%rdi
0xffffffffa0277cdc <rpc_execute+92>:    callq  0xffffffffa0277930 <__rpc_execute>
0xffffffffa0277ce1 <rpc_execute+97>:    mov    -0x18(%rbp),%rbx
crash> dis __rpc_execute | head
0xffffffffa0277930 <__rpc_execute>:     push   %rbp
0xffffffffa0277931 <__rpc_execute+1>:   mov    %rsp,%rbp
0xffffffffa0277934 <__rpc_execute+4>:   push   %r15
0xffffffffa0277936 <__rpc_execute+6>:   push   %r14
0xffffffffa0277938 <__rpc_execute+8>:   push   %r13
0xffffffffa027793a <__rpc_execute+10>:  push   %r12
0xffffffffa027793c <__rpc_execute+12>:  push   %rbx
0xffffffffa027793d <__rpc_execute+13>:  sub    $0x38,%rsp
0xffffffffa0277941 <__rpc_execute+17>:  nopl   0x0(%rax,%rax,1)
0xffffffffa0277946 <__rpc_execute+22>:  movzwl 0xd0(%rdi),%edx
crash> bt -f 9029 | grep " rpc_execute at"
 #5 [ffff88082e147b80] rpc_execute at ffffffffa0277ce1 [sunrpc]
crash> px (0xffff88082e147b80 - 6*8)
$4 = 0xffff88082e147b50
crash> rd 0xffff88082e147b50
ffff88082e147b50:  ffff880826a380c0                    ...&....
crash> kmem 0xffff880826a380c0
CACHE            NAME                 OBJSIZE  ALLOCATED     TOTAL  SLABS  SSIZE
ffff880825dd2ec0 rpc_tasks                256         12        60      4     4k
SLAB              MEMORY            TOTAL  ALLOCATED  FREE
ffff880826a38040  ffff880826a380c0     15          2    13
FREE / [ALLOCATED]
  [ffff880826a380c0]

      PAGE        PHYSICAL      MAPPING       INDEX CNT FLAGS
ffffea001c873c40 826a38000                0 ffff88082e0ad9c0  1 40000000000080 slab
crash> p (struct rpc_task *)0xffff880826a380c0
$8 = (struct rpc_task *) 0xffff880826a380c0
crash> p $8->tk_msg.rpc_proc->p_name
$13 = 0xffffffffa0403f78 "LOOKUP"
crash> p $8->tk_timeouts
$14 = 0
crash> p $8->tk_rqstp->rq_ntrans
$15 = 5326663
  • Now from the rpc_task, look at the transport protocol, and the transport state.
crash> p $8->tk_rqstp->rq_xprt
$17 = (struct rpc_xprt *) 0xffff880832bb1000
crash> p ((struct sock_xprt *)0xffff880832bb1000)->inet
$18 = (struct sock *) 0xffff880807d098c0
crash> p $8->tk_rqstp->rq_xprt->prot
$21 = 6
crash> p $18->__sk_common.skc_state
$22 = 1 '\001'
crash> p (struct tcp_sock *)0xffff880807d098c0
$19 = (struct tcp_sock *) 0xffff880807d098c0
  • Note the above 'prot' value of '6' is 'TCP' and is from include/linux/in.h
* Standard well-defined IP protocols.  */
enum {
  IPPROTO_IP = 0,               /* Dummy protocol for TCP               */
  IPPROTO_ICMP = 1,             /* Internet Control Message Protocol    */
  IPPROTO_IGMP = 2,             /* Internet Group Management Protocol   */
  IPPROTO_IPIP = 4,             /* IPIP tunnels (older KA9Q tunnels use 94) */
  IPPROTO_TCP = 6,              /* Transmission Control Protocol        */
  IPPROTO_EGP = 8,              /* Exterior Gateway Protocol            */
  IPPROTO_PUP = 12,             /* PUP protocol                         */
  IPPROTO_UDP = 17,             /* User Datagram Protocol               */
  IPPROTO_IDP = 22,             /* XNS IDP protocol                     */
  IPPROTO_DCCP = 33,            /* Datagram Congestion Control Protocol */
  IPPROTO_RSVP = 46,            /* RSVP protocol                        */
  IPPROTO_GRE = 47,             /* Cisco GRE tunnels (rfc 1701,1702)    */

  IPPROTO_IPV6   = 41,          /* IPv6-in-IPv4 tunnelling              */

  IPPROTO_ESP = 50,            /* Encapsulation Security Payload protocol */
  IPPROTO_AH = 51,             /* Authentication Header protocol       */
  IPPROTO_BEETPH = 94,         /* IP option pseudo header for BEET */
  IPPROTO_PIM    = 103,         /* Protocol Independent Multicast       */

  IPPROTO_COMP   = 108,                /* Compression Header protocol */
  IPPROTO_SCTP   = 132,         /* Stream Control Transport Protocol    */
  IPPROTO_UDPLITE = 136,        /* UDP-Lite (RFC 3828)                  */

  IPPROTO_RAW    = 255,         /* Raw IP packets                       */
  IPPROTO_MAX
};
  • Note the above skc_state value is from the TCP state enum in include/net/tcp_states.h. A value of '1' above means the transport is in a connected state: TCP_ESTABLISHED.
enum {
        TCP_ESTABLISHED = 1,
        TCP_SYN_SENT,
        TCP_SYN_RECV,
        TCP_FIN_WAIT1,
        TCP_FIN_WAIT2,
        TCP_TIME_WAIT,
        TCP_CLOSE,
        TCP_CLOSE_WAIT,
        TCP_LAST_ACK,
        TCP_LISTEN,
        TCP_CLOSING,    /* Now a valid state */

        TCP_MAX_STATES  /* Leave at the end! */
};

This solution is part of Red Hat’s fast-track publication program, providing a huge library of solutions that Red Hat engineers have created while supporting our customers. To give you the knowledge you need the instant it becomes available, these articles may be presented in a raw and unedited form.