CPU のうちの 1 つが nouveau モジュールドライバの IO オペレーションで停止し、ハードロックアップが発生する
Issue
- システムがクラッシュしました。
 - 以下のようなメッセージがカーネルのリングバッファに出力されました。
 
[7560994.819432] nouveau E[   PFIFO][0000:02:00.0] PBDMA0: ch 2 [Xorg[5542]] subc 0 mthd 0x001c data 0x00001004
[7560994.819432] nouveau E[   PFIFO][0000:02:00.0] PBDMA0:ACQUIRE
[7560994.819432] nouveau E[   PFIFO][0000:02:00.0] PBDMA0: ch 2 [Xorg[5542]] subc 0 mthd 0x001c data 0x00001004
[7560994.819432] nouveau E[   PFIFO][0000:02:00.0] PBDMA0:ACQUIRE
[7560994.819432] nouveau E[   PFIFO][0000:02:00.0] PBDMA0: ch 2 [Xorg[5542]] subc 0 mthd 0x001c data 0x00001004
[7560994.819432] nouveau E[   PFIFO][0000:02:00.0] PBDMA0:ACQUIRE
[7560994.819432] nouveau E[   PFIFO][0000:02:00.0] PBDMA0: ch 2 [Xorg[5542]] subc 0 mthd 0x001c data 0x00001004
[7560994.819432] nouveau E[   PFIFO][0000:02:00.0] PBDMA0:ACQUIRE
[7560994.819432] nouveau E[   PFIFO][0000:02:00.0] PBDMA0: ch 2 [Xorg[5542]] subc 0 mthd 0x001c data 0x00001004
[7560994.819432] nouveau E[   PFIFO][0000:02:00.0] PBDMA0:ACQUIRE
[7560994.819432] nouveau E[   PFIFO][0000:02:00.0] PBDMA0: ch 2 [Xorg[5542]] subc 0 mthd 0x001c data 0x00001004
[7560994.819432] nouveau E[   PFIFO][0000:02:00.0] PBDMA0:ACQUIRE
[7560994.819432] nouveau E[   PFIFO][0000:02:00.0] PBDMA0: ch 2 [Xorg[5542]] subc 0 mthd 0x001c data 0x00001004
[7560994.819432] nouveau E[   PFIFO][0000:02:00.0] PBDMA0:ACQUIRE
[7560994.819432] nouveau E[   PFIFO][0000:02:00.0] PBDMA0: ch 2 [Xorg[5542]] subc 0 mthd 0x001c data 0x00001004
[7560994.819432] nouveau E[   PFIFO][0000:02:00.0] PBDMA0:ACQUIRE
[7560994.819432] nouveau E[   PFIFO][0000:02:00.0] PBDMA0: ch 2 [Xorg[5542]] subc 0 mthd 0x001c data 0x00001004
[7560994.819432] nouveau E[   PFIFO][0000:02:00.0] PBDMA0:ACQUIRE
[7560994.819432] nouveau E[   PFIFO][0000:02:00.0] PBDMA0: ch 2 [Xorg[5542]] subc 0 mthd 0x001c data 0x00001004
[7560994.819432] nouveau E[   PFIFO][0000:02:00.0] PBDMA0:ACQUIRE
[7560994.819432] nouveau E[   PFIFO][0000:02:00.0] PBDMA0: ch 2 [Xorg[5542]] subc 0 mthd 0x001c data 0x00001004
[7560994.819432] nouveau E[   PFIFO][0000:02:00.0] PBDMA0:ACQUIRE
[7560994.819432] nouveau E[   PFIFO][0000:02:00.0] PBDMA0: ch 2 [Xorg[5542]] subc 0 mthd 0x001c data 0x00001004
[7560994.819432] nouveau E[   PFIFO][0000:02:00.0] PBDMA0:ACQUIRE
[7560994.819432] nouveau E[   PFIFO][0000:02:00.0] PBDMA0: ch 2 [Xorg[5542]] subc 0 mthd 0x001c data 0x00001004
[7560994.819432] Hardware name:Dell Inc. Precision Tower 5810/0K240Y, BIOS A08 07/31/2015
[7560994.819432]  ffffffff818674b8 00000000b52d055a ffff88045fd05af0 ffffffff816351f1
[7560994.819432]  ffff88045fd05b70 ffffffff8162ea6c 0000000000000010 ffff88045fd05b80
[7560994.819432]  ffff88045fd05b20 00000000b52d055a ffffffff8101cd69 0000000000000002
[7560994.819432] Call Trace:
[7560994.819432]  <NMI>  [<ffffffff816351f1>] dump_stack+0x19/0x1b
[7560994.819432]  [<ffffffff8162ea6c>] panic+0xd8/0x1e7
[7560994.819432]  [<ffffffff8101cd69>] ? sched_clock+0x9/0x10
[7560994.819432]  [<ffffffff8111b450>] ? restart_watchdog_hrtimer+0x50/0x50
[7560994.819432]  [<ffffffff8111b512>] watchdog_overflow_callback+0xc2/0xd0
[7560994.819432]  [<ffffffff8115ed71>] __perf_event_overflow+0xa1/0x250
[7560994.819432]  [<ffffffff8115f844>] perf_event_overflow+0x14/0x20
[7560994.819432]  [<ffffffff810325a8>] intel_pmu_handle_irq+0x1e8/0x470
[7560994.819432]  [<ffffffff8101cd15>] ? native_sched_clock+0x35/0x80
[7560994.819432]  [<ffffffff810bb45d>] ? sched_clock_local+0x1d/0x80
[7560994.819432]  [<ffffffff8163ebeb>] perf_event_nmi_handler+0x2b/0x50
[7560994.819432]  [<ffffffff8163e339>] nmi_handle.isra.0+0x69/0xb0
[7560994.819432]  [<ffffffff8163e4e9>] do_nmi+0x169/0x340
[7560994.819432]  [<ffffffff8163d771>] end_repeat_nmi+0x1e/0x2e
[7560994.819432]  [<ffffffff810d8a20>] ? get_monotonic_boottime+0xb0/0x100
[7560994.819432]  [<ffffffff810d8a20>] ? get_monotonic_boottime+0xb0/0x100
[7560994.819432]  [<ffffffff810d8a20>] ? get_monotonic_boottime+0xb0/0x100
[7560994.819432]  <<EOE>>  [<ffffffff810a3d91>] posix_get_boottime+0x11/0x20
[7560994.819432]  [<ffffffff810a5464>] SyS_clock_gettime+0x54/0xc0
[7560994.819432]  [<ffffffff81645909>] system_call_fastpath+0x16/0x1b
- クラッシュした際、カーネルリングバッファ中に 
hard LOCKUPメッセージが確認できませんが、ハードロックアップが発生したようです。 - バックトレース:
 
crash> bt -a
PID:0      TASK: ffffffff81951440  CPU:0   COMMAND:"swapper/0"
 #0 [ffff88045fc05af8] panic at ffffffff8162e9f0
 #1 [ffff88045fc05b78] watchdog_overflow_callback at ffffffff8111b512
 #2 [ffff88045fc05b88] __perf_event_overflow at ffffffff8115ed71
 #3 [ffff88045fc05c00] perf_event_overflow at ffffffff8115f844
 #4 [ffff88045fc05c10] intel_pmu_handle_irq at ffffffff810325a8
 #5 [ffff88045fc05e60] perf_event_nmi_handler at ffffffff8163ebeb
 #6 [ffff88045fc05e80] nmi_handle at ffffffff8163e339
 #7 [ffff88045fc05ec8] do_nmi at ffffffff8163e450
 #8 [ffff88045fc05ef0] end_repeat_nmi at ffffffff8163d771
    [exception RIP: ioread32+66]
    RIP: ffffffff813095d2  RSP: ffff88045fc03d80  RFLAGS:00000092
    RAX:000000008000001d  RBX:0000000000000000  RCX: ffff880035eea508
    RDX: ffffffffa0257980  RSI:0000000000000016  RDI: ffffc900121400c0
    RBP: ffff88045fc03e68   R8:0000000000000000   R9: ffff88044ec00000
    R10:0000000000000000  R11:0000000000000000  R12:0000000004000000
    R13:0000000000040120  R14: ffff880035eea400  R15:00000000000400c0
    ORIG_RAX: ffffffffffffffff  CS:0010  SS:0018
--- <NMI exception stack> ---
 #9 [ffff88045fc03d80] ioread32 at ffffffff813095d2
#10 [ffff88045fc03d80] gk104_fifo_intr at ffffffffa0257caf [nouveau]
#11 [ffff88045fc03e70] nvkm_mc_intr at ffffffffa0230815 [nouveau]
#12 [ffff88045fc03eb0] handle_irq_event_percpu at ffffffff8111c2be
#13 [ffff88045fc03ef8] handle_irq_event at ffffffff8111c49d
#14 [ffff88045fc03f20] handle_edge_irq at ffffffff8111f137
#15 [ffff88045fc03f40] handle_irq at ffffffff81016ecf
#16 [ffff88045fc03f78] do_IRQ at ffffffff81647daf
--- <IRQ stack> ---
#17 [ffffffff8193fe28] ret_from_intr at ffffffff8163d0ed
    [exception RIP: tick_nohz_idle_enter+68]
    RIP: ffffffff810e1034  RSP: ffffffff8193fed0  RFLAGS:00000202
    RAX:00000002c2a30af7  RBX:001adc722f68bf10  RCX:0000000000000000
    RDX:00000000004b0a31  RSI:0000000000000086  RDI:0000000000000086
    RBP: ffffffff8193fed0   R8:0000000000000000   R9:0000000000000000
    R10:0000000000000000  R11:0000000000000000  R12: ffffffff810e0a38
    R13: ffffffff8193fe90  R14: ffffffff810a9c62  R15: ffffffff8193fe38
    ORIG_RAX: ffffffffffffff2d  CS:0010  SS:0018
#18 [ffffffff8193fed8] cpu_startup_entry at ffffffff810d615e
#19 [ffffffff8193ff30] rest_init at ffffffff81624e07
#20 [ffffffff8193ff40] start_kernel at ffffffff81a8d057
#21 [ffffffff8193ff88] x86_64_start_reservations at ffffffff81a8c5ee
#22 [ffffffff8193ff98] x86_64_start_kernel at ffffffff81a8c742
PID:0      TASK: ffff880449655080  CPU:1   COMMAND:"swapper/1"
 #0 [ffff88045fc85e70] crash_nmi_callback at ffffffff810458f2
 #1 [ffff88045fc85e80] nmi_handle at ffffffff8163e339
 #2 [ffff88045fc85ec8] do_nmi at ffffffff8163e450
 #3 [ffff88045fc85ef0] end_repeat_nmi at ffffffff8163d771
    [exception RIP: intel_idle+215]
    RIP: ffffffff8135de17  RSP: ffff88044967fe10  RFLAGS:00000046
    RAX:0000000000000020  RBX:0000000000000008  RCX:0000000000000001
    RDX:0000000000000000  RSI: ffff88044967ffd8  RDI:000000000194a000
    RBP: ffff88044967fe40   R8:000000000fa7d767   R9: 0000000000000018
    R10:0000000000046cbf  R11:000000000000001e  R12: ffff88044967ffd8
    R13:0000000000000004  R14:0000000000000020  R15: ffffffff819fdeb8
    ORIG_RAX: ffffffffffffffff  CS:0010  SS:0018
--- <NMI exception stack> ---
 #4 [ffff88044967fe10] intel_idle at ffffffff8135de17
 #5 [ffff88044967fe48] cpuidle_enter_state at ffffffff814d4540
 #6 [ffff88044967fe80] cpuidle_idle_call at ffffffff814d4699
 #7 [ffff88044967fec0] arch_cpu_idle at ffffffff8101e4be
 #8 [ffff88044967fed0] cpu_startup_entry at ffffffff810d6305
 #9 [ffff88044967ff28] start_secondary at ffffffff810475fa
PID:572    TASK: ffff880443c25080  CPU:2   COMMAND:"systemd-journal"
 #0 [ffff88045fd059c8] machine_kexec at ffffffff81051beb
 #1 [ffff88045fd05a28] crash_kexec at ffffffff810f2542
 #2 [ffff88045fd05af8] panic at ffffffff8162ea73
 #3 [ffff88045fd05b78] watchdog_overflow_callback at ffffffff8111b512
 #4 [ffff88045fd05b88] __perf_event_overflow at ffffffff8115ed71
 #5 [ffff88045fd05c00] perf_event_overflow at ffffffff8115f844
 #6 [ffff88045fd05c10] intel_pmu_handle_irq at ffffffff810325a8
 #7 [ffff88045fd05e60] perf_event_nmi_handler at ffffffff8163ebeb
 #8 [ffff88045fd05e80] nmi_handle at ffffffff8163e339
 #9 [ffff88045fd05ec8] do_nmi at ffffffff8163e4e9
#10 [ffff88045fd05ef0] end_repeat_nmi at ffffffff8163d771
    [exception RIP: get_monotonic_boottime+176]
    RIP: ffffffff810d8a20  RSP: ffff880447ec3f10  RFLAGS:00000216
    RAX:69cf8c6df0c1c509  RBX: ffffffff81965380  RCX:0000000000000000
    RDX:00000000850ed66b  RSI: ffffffc313319b93  RDI: ffffffffa8e13a9e
    RBP: ffff880447ec3f38   R8:0000000000735f30   R9:0000000000000000
    R10:0000000000000002  R11:0000000000000202  R12:0000000000735e0f
    R13:0000000000000000  R14: ffff880447ec3f58  R15:000000008ffabd80
    ORIG_RAX: ffffffffffffffff  CS:0010  SS:0018
--- <NMI exception stack> ---
#11 [ffff880447ec3f10] get_monotonic_boottime at ffffffff810d8a20
#12 [ffff880447ec3f40] posix_get_boottime at ffffffff810a3d91
#13 [ffff880447ec3f50] sys_clock_gettime at ffffffff810a5464
#14 [ffff880447ec3f80] system_call_fastpath at ffffffff81645909
    RIP:00007ffd0f3a87c2  RSP:00007ffd0f2a0420  RFLAGS:00000246
    RAX:00000000000000e4  RBX: ffffffff81645909  RCX:0000000000000000
    RDX:0000000000000000  RSI:00007ffd0f2a0400  RDI:0000000000000007
    RBP:00007ffd0f2a03e0   R8:0000000000735f30   R9:0000000000000000
    R10:0000000000000002  R11:0000000000000202  R12:00007ffd0f2a0430
    R13:00000000b52d055a  R14: ffffffffffffffff  R15:0000000000000001
    ORIG_RAX:00000000000000e4  CS:0033  SS:002b
PID:0      TASK: ffff880449656780  CPU:3   COMMAND:"swapper/3"
 #0 [ffff88045fd85e70] crash_nmi_callback at ffffffff810458f2
 #1 [ffff88045fd85e80] nmi_handle at ffffffff8163e339
 #2 [ffff88045fd85ec8] do_nmi at ffffffff8163e450
 #3 [ffff88045fd85ef0] end_repeat_nmi at ffffffff8163d771
    [exception RIP: intel_idle+215]
    RIP: ffffffff8135de17  RSP: ffff88044968be10  RFLAGS:00000046
    RAX:0000000000000020  RBX:0000000000000008  RCX:0000000000000001
    RDX:0000000000000000  RSI: ffff88044968bfd8  RDI:0000000000000003
    RBP: ffff88044968be40   R8:0000000020c49b8f   R9:000000000000001c
    R10:0000000010708b19  R11:000000000000001e  R12: ffff88044968bfd8
    R13:0000000000000004  R14:0000000000000020  R15: ffffffff819fdeb8
    ORIG_RAX: ffffffffffffffff  CS:0010  SS:0018
--- <NMI exception stack> ---
 #4 [ffff88044968be10] intel_idle at ffffffff8135de17
 #5 [ffff88044968be48] cpuidle_enter_state at ffffffff814d4540
 #6 [ffff88044968be80] cpuidle_idle_call at ffffffff814d4699
 #7 [ffff88044968bec0] arch_cpu_idle at ffffffff8101e4be
 #8 [ffff88044968bed0] cpu_startup_entry at ffffffff810d6305
 #9 [ffff88044968bf28] start_secondary at ffffffff810475fa
  Environment
- Red Hat Enterprise Linux 7.2 (kernel-3.10.0-327.el7)
 - カーネルのインボックス nouveau ドライバー
 
Subscriber exclusive content
A Red Hat subscription provides unlimited access to our knowledgebase, tools, and much more.