A NFS server crashes due to a general protection fault occurred in nfsd4_cb_done().
Issue
-
A NFS server crashes due to a general protection fault occurred in nfsd4_cb_done().
-
Panic message observed in kernel ring buffer is indicative that the kernel crashed with general protection fault.
crash> log
...
[662118.960842] general protection fault: 0000 [#1] SMP
[662118.960929] Modules linked in: rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache binfmt_misc ip6t_rpfilter ipt_REJECT nf_reject_ipv4 ip6t_REJECT nf_reject_ipv6 xt_conntrack ip_set nfnetlink ebtable_nat ebtable_broute bridge stp llc ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter vmw_vsock_vmci_transport vsock ext4 mbcache jbd2 sb_edac edac_core coretemp iosf_mbi crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd ppdev vmw_balloon joydev pcspkr sg vmw_vmci shpchp i2c_piix4 parport_pc parport nfsd auth_rpcgss nfs_acl lockd grace
[662118.961445] sunrpc ip_tables xfs libcrc32c sr_mod cdrom ata_generic pata_acpi vmwgfx drm_kms_helper sd_mod crc_t10dif crct10dif_generic syscopyarea sysfillrect sysimgblt fb_sys_fops ttm crct10dif_pclmul crct10dif_common ahci libahci ata_piix crc32c_intel serio_raw drm libata vmxnet3 vmw_pvscsi i2c_core dm_mirror dm_region_hash dm_log dm_mod
[662118.961575] CPU: 2 PID: 915 Comm: kworker/2:1 Not tainted 3.10.0-693.1.1.el7.x86_64 #1
[662118.961602] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 09/21/2015
[662118.961655] Workqueue: rpciod rpc_async_schedule [sunrpc]
[662118.961676] task: ffff880231e52f70 ti: ffff88003e144000 task.ti: ffff88003e144000
[662118.961701] RIP: 0010:[<ffffffffc03bff4b>] [<ffffffffc03bff4b>] nfsd4_cb_done+0x2b/0x310 [nfsd]
[662118.961747] RSP: 0018:ffff88003e147d60 EFLAGS: 00010246
[662118.961765] RAX: ffffffffc03bff20 RBX: ffff880014635bb8 RCX: dead000000000200
[662118.961800] RDX: 0000000000000001 RSI: ffff880014635bb8 RDI: ffff88005299eb00
[662118.961824] RBP: ffff88003e147d88 R08: ffff88005299eb48 R09: dffe3d98a599eb40
[662118.961861] R10: dffe3d98a599eb40 R11: ffffea00003e4400 R12: ffff88005299eb00
[662118.961885] R13: bfe58c81e3a881e3 R14: bfe58c81e3a881e3 R15: 0000000000000080
[662118.961909] FS: 0000000000000000(0000) GS:ffff88023fc80000(0000) knlGS:0000000000000000
[662118.961936] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[662118.961955] CR2: 00007f8e5306a000 CR3: 000000022b6ef000 CR4: 00000000001407e0
[662118.962011] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[662118.962061] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[662118.962085] Stack:
[662118.962094] ffff88005299eb00 ffff88001bc11c00 ffffffffc03469c0 ffffffffc03469c0
[662118.962123] 0000000000000080 ffff88003e147da0 ffffffffc03469ee ffff88005299eb00
[662118.962151] ffff88003e147e08 ffffffffc0348007 0000000000010000 ffff88005299eb30
[662118.962179] Call Trace:
[662118.962203] [<ffffffffc03469c0>] ? rpc_destroy_wait_queue+0x20/0x20 [sunrpc]
[662118.962234] [<ffffffffc03469c0>] ? rpc_destroy_wait_queue+0x20/0x20 [sunrpc]
[662118.962264] [<ffffffffc03469ee>] rpc_exit_task+0x2e/0x90 [sunrpc]
[662118.962292] [<ffffffffc0348007>] __rpc_execute+0x97/0x410 [sunrpc]
[662118.962319] [<ffffffffc0348392>] rpc_async_schedule+0x12/0x20 [sunrpc]
[662118.962344] [<ffffffff810a881a>] process_one_work+0x17a/0x440
[662118.962366] [<ffffffff810a94e6>] worker_thread+0x126/0x3c0
[662118.962386] [<ffffffff810a93c0>] ? manage_workers.isra.24+0x2a0/0x2a0
[662118.963408] [<ffffffff810b098f>] kthread+0xcf/0xe0
[662118.964427] [<ffffffff8108ddeb>] ? do_exit+0x6bb/0xa40
[662118.965340] [<ffffffff810b08c0>] ? insert_kthread_work+0x40/0x40
[662118.966255] [<ffffffff816b4f18>] ret_from_fork+0x58/0x90
[662118.967164] [<ffffffff810b08c0>] ? insert_kthread_work+0x40/0x40
[662118.968094] Code: 0f 1f 44 00 00 55 f6 05 17 6e fb ff 10 48 89 e5 41 57 41 56 41 55 41 54 49 89 fc 53 48 89 f3 4c 8b 36 0f 85 40 02 00 00 4d 89 f5 <41> 8b 8d 90 01 00 00 49 8b 85 38 03 00 00 85 c9 0f 85 0f 01 00
[662118.970130] RIP [<ffffffffc03bff4b>] nfsd4_cb_done+0x2b/0x310 [nfsd]
[662118.971108] RSP <ffff88003e147d60>
- Panic task's backtrace is like this.
crash> set -p
PID: 915
COMMAND: "kworker/2:1"
TASK: ffff880231e52f70 [THREAD_INFO: ffff88003e144000]
CPU: 2
STATE: TASK_RUNNING (PANIC)
crash> bt
PID: 915 TASK: ffff880231e52f70 CPU: 2 COMMAND: "kworker/2:1"
#0 [ffff88003e147ae0] machine_kexec at ffffffff8105c4cb
#1 [ffff88003e147b40] __crash_kexec at ffffffff81104a32
#2 [ffff88003e147c10] crash_kexec at ffffffff81104b20
#3 [ffff88003e147c28] oops_end at ffffffff816ad278
#4 [ffff88003e147c50] die at ffffffff8102e97b
#5 [ffff88003e147c80] do_general_protection at ffffffff816acbfe
#6 [ffff88003e147cb0] general_protection at ffffffff816ac4a8
[exception RIP: nfsd4_cb_done+43]
RIP: ffffffffc03bff4b RSP: ffff88003e147d60 RFLAGS: 00010246
RAX: ffffffffc03bff20 RBX: ffff880014635bb8 RCX: dead000000000200
RDX: 0000000000000001 RSI: ffff880014635bb8 RDI: ffff88005299eb00
RBP: ffff88003e147d88 R8: ffff88005299eb48 R9: dffe3d98a599eb40
R10: dffe3d98a599eb40 R11: ffffea00003e4400 R12: ffff88005299eb00
R13: bfe58c81e3a881e3 R14: bfe58c81e3a881e3 R15: 0000000000000080
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
#7 [ffff88003e147d90] rpc_exit_task at ffffffffc03469ee [sunrpc]
#8 [ffff88003e147da8] __rpc_execute at ffffffffc0348007 [sunrpc]
#9 [ffff88003e147e10] rpc_async_schedule at ffffffffc0348392 [sunrpc]
#10 [ffff88003e147e20] process_one_work at ffffffff810a881a
#11 [ffff88003e147e68] worker_thread at ffffffff810a94e6
#12 [ffff88003e147ec8] kthread at ffffffff810b098f
#13 [ffff88003e147f50] ret_from_fork at ffffffff816b4f18
- We were in this code and crashed.
1017 static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
1018 {
1019 struct nfsd4_callback *cb = calldata;
1020 struct nfs4_client *clp = cb->cb_clp;
1021
1022 dprintk("%s: minorversion=%d\n", __func__,
1023 clp->cl_minorversion);
1024
1025 if (!nfsd4_cb_sequence_done(task, cb))
||
||
\/
939 static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback *cb)
940 {
941 struct nfs4_client *clp = cb->cb_clp;
942 struct nfsd4_session *session = clp->cl_cb_session;
943 bool ret = true;
944
945 if (!clp->cl_minorversion) { <<------------ The kernel crashed here.
...
- struct nfs4_client * should be in %r13.
crash> nfs4_client.cl_minorversion -ox
struct nfs4_client {
[0x190] u32 cl_minorversion;
}
/usr/src/debug/kernel-3.10.0-693.1.1.el7/linux-3.10.0-693.1.1.el7.x86_64/fs/nfsd/nfs4callback.c: 945
0xffffffffc03bff4b <nfsd4_cb_done+43>: mov 0x190(%r13),%ecx
- R13 is 0xbfe58c81e3a881e3 which is an invalid kernel address of struct nfs4_client * hence the kernel crashed with a general protection fault.
[exception RIP: nfsd4_cb_done+43]
RIP: ffffffffc03bff4b RSP: ffff88003e147d60 RFLAGS: 00010246
RAX: ffffffffc03bff20 RBX: ffff880014635bb8 RCX: dead000000000200
RDX: 0000000000000001 RSI: ffff880014635bb8 RDI: ffff88005299eb00
RBP: ffff88003e147d88 R8: ffff88005299eb48 R9: dffe3d98a599eb40
R10: dffe3d98a599eb40 R11: ffffea00003e4400 R12: ffff88005299eb00
R13: bfe58c81e3a881e3 R14: bfe58c81e3a881e3 R15: 0000000000000080
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
Environment
- Red Hat Enterprise Linux 7
Subscriber exclusive content
A Red Hat subscription provides unlimited access to our knowledgebase, tools, and much more.