Crash in scsi_dispatch_cmd on MRG Realtime kernel

Solution Unverified - Updated -

Issue

System crashes with the following kernel messages:

[282911.105301] BUG: unable to handle kernel NULL pointer dereference at 0000000000000038
[282911.105307] IP: [<ffffffff813ea4b1>] scsi_dispatch_cmd+0xb1/0x2a0
[282911.105308] PGD 1036a4a067 PUD 1036a49067 PMD 0
[282911.105310] Oops: 0000 [#1] PREEMPT SMP
[282911.105340] Modules linked in: altera_tse_qtcp(O) mpt3sas mpt2sas scsi_transport_sas mptctl mptbase dell_rbu nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss oid_registry nfsv4 nfs fscache lockd sunrpc grace ipv6 iTCO_wdt iTCO_vendor_support mxm_wmi dcdbas pcspkr sb_edac edac_core shpchp lpc_ich igb dca sg onload(O) onload_cplane(PO) sfc_char(O) sfc_resource(O) sfc_affinity(O) sfc(O) mtd ptp pps_core mdio ipmi_devintf ipmi_si ipmi_msghandler acpi_power_meter hwmon ext4 jbd2 mbcache sd_mod crc_t10dif crct10dif_common aesni_intel ablk_helper cryptd lrw gf128mul glue_helper aes_x86_64 megaraid_sas ahci libahci wmi mgag200 ttm drm_kms_helper drm i2c_algo_bit sysimgblt sysfillrect i2c_core syscopyarea dm_mirror dm_region_hash dm_log dm_mod [last unloaded: altera_tse_qtcp]
[282911.105342] CPU: 1 PID: 18134 Comm: kworker/u386:1 Tainted: P           O   ------------   3.10.0-327.rt56.198.el6rt.x86_64 #1
[282911.105343] Hardware name: Dell Inc. PowerEdge R630/02C2CP, BIOS 2.3.4 11/08/2016
[282911.105348] Workqueue: writeback bdi_writeback_workfn (flush-253:5)
[282911.105348] task: ffff8810357018c0 ti: ffff881035a34000 task.ti: ffff881035a34000
[282911.105350] RIP: 0010:[<ffffffff813ea4b1>]  [<ffffffff813ea4b1>] scsi_dispatch_cmd+0xb1/0x2a0
[282911.105351] RSP: 0018:ffff881035a37758  EFLAGS: 00010246
[282911.105351] RAX: 0000000000000000 RBX: ffff8810357d8780 RCX: 000000000000000a
[282911.105352] RDX: 00000000813ee3b8 RSI: ffff8810357d8780 RDI: ffff8810357d9980
[282911.105352] RBP: ffff881035a37778 R08: 0000000000009200 R09: 0000000000000004
[282911.105353] R10: 0000000000000000 R11: 0000000000000002 R12: ffff880037b3c000
[282911.105353] R13: ffff8810357d9980 R14: ffff88082f1f34e0 R15: 0000000000000000
[282911.105354] FS:  0000000000000000(0000) GS:ffff88105de00000(0000) knlGS:0000000000000000
[282911.105354] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[282911.105354] CR2: 0000000000000038 CR3: 0000001036a47000 CR4: 00000000003407e0
[282911.105355] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[282911.105355] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[282911.105356] Stack:
[282911.105359]  ffffffff813f380a ffff880831975000 ffff880037b3c000 ffff881034fc34e0
[282911.105360]  ffff881035a37818 ffffffff813f3a06 ffff881034fc34e0 0000000000000000
[282911.105361]  0000000000000000 ffff8810357d8780 0000000000000000 ffff880037b3c208
[282911.105361] Call Trace:
[282911.105365]  [<ffffffff813f380a>] ? scsi_request_fn+0x30a/0x6d0
[282911.105367]  [<ffffffff813f3a06>] scsi_request_fn+0x506/0x6d0
[282911.105372]  [<ffffffff81296757>] __blk_run_queue+0x37/0x50
[282911.105373]  [<ffffffff81298b9e>] queue_unplugged+0x4e/0x100
[282911.105374]  [<ffffffff81299a63>] blk_flush_plug_list+0x1c3/0x1f0
[282911.105376]  [<ffffffff81299aa8>] blk_finish_plug+0x18/0x50
[282911.105389]  [<ffffffffa01b2b20>] ext4_writepages+0x410/0x780 [ext4]
[282911.105393]  [<ffffffff8110dcfc>] ? acct_account_cputime+0x1c/0x20
[282911.105396]  [<ffffffff810c06b8>] ? tick_nohz_full_kick+0x48/0x50
[282911.105399]  [<ffffffff815f322e>] ? __schedule+0x43e/0x800
[282911.105403]  [<ffffffff81155963>] do_writepages+0x23/0x40
[282911.105404]  [<ffffffff811daa49>] __writeback_single_inode+0x49/0x210
[282911.105408]  [<ffffffff81099bea>] ? migrate_enable+0x15a/0x220
[282911.105409]  [<ffffffff811db26b>] writeback_sb_inodes+0x32b/0x400
[282911.105412]  [<ffffffff810c6aec>] ? rt_down_read_trylock+0x2c/0x60
[282911.105413]  [<ffffffff811db3de>] __writeback_inodes_wb+0x9e/0xd0
[282911.105414]  [<ffffffff811db723>] wb_writeback+0x263/0x3c0
[282911.105415]  [<ffffffff811db9fb>] wb_do_writeback+0x17b/0x1f0
[282911.105416]  [<ffffffff811dbb00>] bdi_writeback_workfn+0x90/0x230
[282911.105417]  [<ffffffff81099b6b>] ? migrate_enable+0xdb/0x220
[282911.105419]  [<ffffffff81081ff1>] process_one_work+0x191/0x520
[282911.105420]  [<ffffffff810837dc>] worker_thread+0x16c/0x3f0
[282911.105421]  [<ffffffff81083670>] ? manage_workers+0x130/0x130
[282911.105422]  [<ffffffff81083670>] ? manage_workers+0x130/0x130
[282911.105425]  [<ffffffff81089dfe>] kthread+0xbe/0xd0
[282911.105426]  [<ffffffff81089d40>] ? kthreadd+0x1d0/0x1d0
[282911.105430]  [<ffffffff815fda08>] ret_from_fork+0x58/0x90
[282911.105431]  [<ffffffff81089d40>] ? kthreadd+0x1d0/0x1d0

The panic kernel stack trace is

crash> bt
PID: 18134  TASK: ffff8810357018c0  CPU: 1   COMMAND: "kworker/u386:1"
 #0 [ffff881035a36670] die at ffffffff8100799b
 #1 [ffff881035a366a0] do_trap at ffffffff815f6b1b
 #2 [ffff881035a36700] do_invalid_op at ffffffff81004ef5
 #3 [ffff881035a367b0] invalid_op at ffffffff815feff8
    [exception RIP: change_page_attr_set_clr+0x4a6]
    RIP: ffffffff8104e7d6  RSP: ffff881035a36868  RFLAGS: 00010046
    RAX: 0000000000000002  RBX: 0000000000000200  RCX: 0000000000000010
    RDX: 0000000000061f76  RSI: 0000000080000000  RDI: 0000000080000000
    RBP: ffff881035a36918   R8: 0000000000000001   R9: 0000000000000000
    R10: 0000000000000000  R11: 0000000000000001  R12: 0000000000000005
    R13: 0000000000000010  R14: 0000000000000000  R15: 0000000000000000
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
 #4 [ffff881035a36920] _set_pages_array at ffffffff8104eaba
 #5 [ffff881035a36980] set_pages_array_wc at ffffffff8104eb23
 #6 [ffff881035a36990] ttm_set_pages_caching at ffffffffa00b3b8e [ttm]
 #7 [ffff881035a369b0] ttm_alloc_new_pages at ffffffffa00b4146 [ttm]
 #8 [ffff881035a36a10] ttm_page_pool_fill_locked.clone.1 at ffffffffa00b42b4 [ttm]
 #9 [ffff881035a36a70] ttm_page_pool_get_pages.clone.2 at ffffffffa00b43b0 [ttm]
#10 [ffff881035a36ab0] ttm_get_pages.clone.0 at ffffffffa00b44fb [ttm]
#11 [ffff881035a36b10] ttm_pool_populate at ffffffffa00b46ec [ttm]
#12 [ffff881035a36b50] mgag200_ttm_tt_populate at ffffffffa00f9f4e [mgag200]
#13 [ffff881035a36b60] ttm_bo_move_memcpy at ffffffffa00b0bc0 [ttm]
#14 [ffff881035a36c70] mgag200_bo_move at ffffffffa00f9e78 [mgag200]
#15 [ffff881035a36c80] ttm_bo_handle_move_mem at ffffffffa00acdd9 [ttm]
#16 [ffff881035a36d20] ttm_bo_validate at ffffffffa00af5de [ttm]
#17 [ffff881035a36dd0] mgag200_bo_push_sysram at ffffffffa00fa043 [mgag200]
#18 [ffff881035a36df0] mga_crtc_do_set_base.clone.0 at ffffffffa00f6dbf [mgag200]
#19 [ffff881035a36e50] mga_crtc_mode_set at ffffffffa00f7a61 [mgag200]
#20 [ffff881035a36f00] drm_crtc_helper_set_mode at ffffffffa00c4999 [drm_kms_helper]
#21 [ffff881035a37160] drm_crtc_helper_set_config at ffffffffa00c5a49 [drm_kms_helper]
#22 [ffff881035a37200] drm_mode_set_config_internal at ffffffffa006c1e6 [drm]
#23 [ffff881035a37230] drm_fb_helper_pan_display at ffffffffa00d0841 [drm_kms_helper]
#24 [ffff881035a37280] fb_pan_display at ffffffff8131f0c6
#25 [ffff881035a372a0] bit_update_start at ffffffff81330d4a
#26 [ffff881035a372c0] fbcon_switch at ffffffff8132cf56
#27 [ffff881035a373b0] redraw_screen at ffffffff81395a89
#28 [ffff881035a373e0] fbcon_blank at ffffffff8132cb1a
#29 [ffff881035a374e0] do_unblank_screen at ffffffff81396568
#30 [ffff881035a37500] unblank_screen at ffffffff813966a0
#31 [ffff881035a37510] bust_spinlocks at ffffffff812cae1d
#32 [ffff881035a37520] oops_end at ffffffff815f72d0
#33 [ffff881035a37550] no_context at ffffffff8104b9fe
#34 [ffff881035a375a0] __bad_area_nosemaphore at ffffffff8104bbed
#35 [ffff881035a375f0] bad_area_nosemaphore at ffffffff8104bd03
#36 [ffff881035a37600] __do_page_fault at ffffffff815f977f
#37 [ffff881035a37670] do_page_fault at ffffffff815f9bcd
#38 [ffff881035a376a0] page_fault at ffffffff815f65f2
    [exception RIP: scsi_dispatch_cmd+0xb1]
    RIP: ffffffff813ea4b1  RSP: ffff881035a37758  RFLAGS: 00010246
    RAX: 0000000000000000  RBX: ffff8810357d8780  RCX: 000000000000000a
    RDX: 00000000813ee3b8  RSI: ffff8810357d8780  RDI: ffff8810357d9980
    RBP: ffff881035a37778   R8: 0000000000009200   R9: 0000000000000004
    R10: 0000000000000000  R11: 0000000000000002  R12: ffff880037b3c000
    R13: ffff8810357d9980  R14: ffff88082f1f34e0  R15: 0000000000000000
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
#39 [ffff881035a37780] scsi_request_fn at ffffffff813f3a06
#40 [ffff881035a37820] __blk_run_queue at ffffffff81296757
#41 [ffff881035a37840] queue_unplugged at ffffffff81298b9e
#42 [ffff881035a37880] blk_flush_plug_list at ffffffff81299a63
#43 [ffff881035a378f0] blk_finish_plug at ffffffff81299aa8
#44 [ffff881035a37910] ext4_writepages at ffffffffa01b2b20 [ext4]
#45 [ffff881035a37ab0] do_writepages at ffffffff81155963
#46 [ffff881035a37ac0] __writeback_single_inode at ffffffff811daa49
#47 [ffff881035a37b10] writeback_sb_inodes at ffffffff811db26b
#48 [ffff881035a37be0] __writeback_inodes_wb at ffffffff811db3de
#49 [ffff881035a37c30] wb_writeback at ffffffff811db723
#50 [ffff881035a37cf0] wb_do_writeback at ffffffff811db9fb
#51 [ffff881035a37d90] bdi_writeback_workfn at ffffffff811dbb00
#52 [ffff881035a37df0] process_one_work at ffffffff81081ff1
#53 [ffff881035a37e40] worker_thread at ffffffff810837dc
#54 [ffff881035a37ec0] kthread at ffffffff81089dfe
#55 [ffff881035a37f50] ret_from_fork at ffffffff815fda08

The important section of the kernel panic stack trace is from level #32 to level #55. Anything above the level #32 is a side effect of the primary panic/oops leading to a nested trap while printing kernel panic messages to the console.

Environment

  • Red Hat Enterprise MRG 2.5.30 Realtime Kernel
  • Kernel version: 3.10.0-327.rt56.198.el6rt.x86_64
  • SCSI host controlled by Avago SAS based MegaRAID driver

Subscriber exclusive content

A Red Hat subscription provides unlimited access to our knowledgebase, tools, and much more.

Current Customers and Partners

Log in for full access

Log In

New to Red Hat?

Learn more about Red Hat subscriptions

Using a Red Hat product through a public cloud?

How to access this content