Hard lockup occurs due to an infinite loop encountered in distribute_cfs_runtime()

Solution Verified - Updated -

Issue

  • Hard lockup occurs due to an infinite loop encountered in distribute_cfs_runtime()
[ 1432.242810] Kernel panic - not syncing: Hard LOCKUP
[ 1432.242829] CPU: 25 PID: 0 Comm: swapper/25 Not tainted 3.10.0-514.el7.x86_64 #1
[ 1432.242855] Hardware name: Cisco Systems Inc UCSC-C220-M4S/UCSC-C220-M4S, BIOS C220M4.2.0.3d.0.111120141447 11/11/2014
[ 1432.242891]  ffffffff818d9764 b7b1a7a2bef0fc23 ffff88105f445b18 ffffffff81685eac
[ 1432.242921]  ffff88105f445b98 ffffffff8167f2b3 0000000000000010 ffff88105f445ba8
[ 1432.242951]  ffff88105f445b48 b7b1a7a2bef0fc23 ffff88105f445ba8 ffffffff818d946a
[ 1432.242980] Call Trace:
[ 1432.242991]  <NMI>  [<ffffffff81685eac>] dump_stack+0x19/0x1b
[ 1432.243018]  [<ffffffff8167f2b3>] panic+0xe3/0x1f2
[ 1432.243039]  [<ffffffff8108562f>] nmi_panic+0x3f/0x40
[ 1432.243059]  [<ffffffff8112f0e6>] watchdog_overflow_callback+0xf6/0x100
[ 1432.243085]  [<ffffffff8117465e>] __perf_event_overflow+0x8e/0x1f0
[ 1432.243108]  [<ffffffff811752a4>] perf_event_overflow+0x14/0x20
[ 1432.243132]  [<ffffffff81009d88>] intel_pmu_handle_irq+0x1f8/0x4e0
[ 1432.243156]  [<ffffffff81319d7c>] ? ioremap_page_range+0x27c/0x3e0
[ 1432.243179]  [<ffffffff811bedf4>] ? vunmap_page_range+0x1c4/0x310
[ 1432.243202]  [<ffffffff811bef51>] ? unmap_kernel_range_noflush+0x11/0x20
[ 1432.243227]  [<ffffffff813c93d4>] ? ghes_copy_tofrom_phys+0x124/0x210
[ 1432.243252]  [<ffffffff813c9560>] ? ghes_read_estatus+0xa0/0x190
[ 1432.243275]  [<ffffffff8168daeb>] perf_event_nmi_handler+0x2b/0x50
[ 1432.243298]  [<ffffffff8168ef19>] nmi_handle.isra.0+0x69/0xb0
[ 1432.243320]  [<ffffffff8168f093>] do_nmi+0x133/0x410
[ 1432.243339]  [<ffffffff8168e353>] end_repeat_nmi+0x1e/0x2e
[ 1432.243360]  [<ffffffff8168d812>] ? _raw_spin_lock+0x32/0x50
[ 1432.243381]  [<ffffffff8168d812>] ? _raw_spin_lock+0x32/0x50
[ 1432.243402]  [<ffffffff8168d812>] ? _raw_spin_lock+0x32/0x50
[ 1432.243422]  <<EOE>>  <IRQ>  [<ffffffff810d100b>] unthrottle_cfs_rq+0x4b/0x170
[ 1432.243453]  [<ffffffff810d12e2>] distribute_cfs_runtime+0xf2/0x100
[ 1432.243476]  [<ffffffff810d147f>] sched_cfs_period_timer+0xcf/0x160
[ 1432.243499]  [<ffffffff810d13b0>] ? sched_cfs_slack_timer+0xc0/0xc0
[ 1432.243523]  [<ffffffff810b4862>] __hrtimer_run_queues+0xd2/0x260
[ 1432.243546]  [<ffffffff810b4e00>] hrtimer_interrupt+0xb0/0x1e0
[ 1432.243569]  [<ffffffff810510d7>] local_apic_timer_interrupt+0x37/0x60
[ 1432.243594]  [<ffffffff81698bcf>] smp_apic_timer_interrupt+0x3f/0x60
[ 1432.243617]  [<ffffffff8169711d>] apic_timer_interrupt+0x6d/0x80
[ 1432.243638]  <EOI>  [<ffffffff81513f52>] ? cpuidle_enter_state+0x52/0xc0
[ 1432.243665]  [<ffffffff81514099>] cpuidle_idle_call+0xd9/0x210
[ 1432.243688]  [<ffffffff8103516e>] arch_cpu_idle+0xe/0x30
[ 1432.243709]  [<ffffffff810e7c95>] cpu_startup_entry+0x245/0x290
[ 1432.243732]  [<ffffffff8104f12a>] start_secondary+0x1ba/0x230
  • The following backtrace observed from the panic task:
crash> bt
PID: 0      TASK: ffff8808fce38fb0  CPU: 25  COMMAND: "swapper/25"
 #0 [ffff88105f4459f0] machine_kexec at ffffffff81059cdb
 #1 [ffff88105f445a50] __crash_kexec at ffffffff81105182
 #2 [ffff88105f445b20] panic at ffffffff8167f2ba
 #3 [ffff88105f445ba0] nmi_panic at ffffffff8108562f
 #4 [ffff88105f445bb0] watchdog_overflow_callback at ffffffff8112f0e6
 #5 [ffff88105f445bc8] __perf_event_overflow at ffffffff8117465e
 #6 [ffff88105f445c00] perf_event_overflow at ffffffff811752a4
 #7 [ffff88105f445c10] intel_pmu_handle_irq at ffffffff81009d88
 #8 [ffff88105f445e48] perf_event_nmi_handler at ffffffff8168daeb
 #9 [ffff88105f445e68] nmi_handle at ffffffff8168ef19
#10 [ffff88105f445eb0] do_nmi at ffffffff8168f093
#11 [ffff88105f445ef0] end_repeat_nmi at ffffffff8168e353
    [exception RIP: _raw_spin_lock+50]
    RIP: ffffffff8168d812  RSP: ffff88105f443e18  RFLAGS: 00000012
    RAX: 0000000000007f72  RBX: ffff88105aab1b00  RCX: 0000000000003f34
    RDX: 0000000000003f42  RSI: 0000000000003f42  RDI: ffff881058a79d48
    RBP: ffff88105f443e18   R8: 0000000000800008   R9: 0000000000000001
    R10: 0000000000018695  R11: 0000000000000000  R12: ffff8810537cd400
    R13: ffff88105f2d6c40  R14: ffff881058a79c00  R15: ffff881058a79d48
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
--- <NMI exception stack> ---
#12 [ffff88105f443e18] _raw_spin_lock at ffffffff8168d812
#13 [ffff88105f443e20] unthrottle_cfs_rq at ffffffff810d100b
#14 [ffff88105f443e58] distribute_cfs_runtime at ffffffff810d12e2
#15 [ffff88105f443ea0] sched_cfs_period_timer at ffffffff810d147f
#16 [ffff88105f443ed8] __hrtimer_run_queues at ffffffff810b4862
#17 [ffff88105f443f30] hrtimer_interrupt at ffffffff810b4e00
#18 [ffff88105f443f80] local_apic_timer_interrupt at ffffffff810510d7
#19 [ffff88105f443f98] smp_apic_timer_interrupt at ffffffff81698bcf
#20 [ffff88105f443fb0] apic_timer_interrupt at ffffffff8169711d
--- <IRQ stack> ---
#21 [ffff8808fce47da8] apic_timer_interrupt at ffffffff8169711d
    [exception RIP: cpuidle_enter_state+82]
    RIP: ffffffff81513f52  RSP: ffff8808fce47e50  RFLAGS: 00000206
    RAX: 0000014aa48e9400  RBX: 000000000000f8a0  RCX: 0000000000000018
    RDX: 0000000225c17d03  RSI: ffff8808fce47fd8  RDI: 0000014aa48e9400
    RBP: ffff8808fce47e78   R8: 000000000000608f   R9: 0000000000000018
    R10: 0000000000018695  R11: 0000000000000000  R12: ffff8808fce47e20
    R13: ffff88105f44f8e0  R14: 0000000000000082  R15: ffff88105f44f8e0
    ORIG_RAX: ffffffffffffff10  CS: 0010  SS: 0018
#22 [ffff8808fce47e80] cpuidle_idle_call at ffffffff81514099
#23 [ffff8808fce47ec0] arch_cpu_idle at ffffffff8103516e
#24 [ffff8808fce47ed0] cpu_startup_entry at ffffffff810e7c95
#25 [ffff8808fce47f28] start_secondary at ffffffff8104f12a

Environment

  • Red Hat Enterprise Linux 7.3 (kernel-3.10.0-514.el7.x86_64)

Subscriber exclusive content

A Red Hat subscription provides unlimited access to our knowledgebase, tools, and much more.

Current Customers and Partners

Log in for full access

Log In

New to Red Hat?

Learn more about Red Hat subscriptions

Using a Red Hat product through a public cloud?

How to access this content