Kernel-RT gets panic in the routine of uio_event_notify() due to an issue with unsigned igb_uio driver.
Environment
- Red Hat Enterprise Linux 8 for Real Time
- Third-party kernel module igb_uio
Issue
- The Real-time kernel gets panic in the context of
uio_event_notify()function by showing below message.
BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
PGD 0
Oops: 0010 [#1] PREEMPT_RT SMP NOPTI
CPU: 0 PID: 0 Comm: swapper/0 Kdump: loaded Tainted: G W OE --------- - - 4.18.0-372.32.1.rt7.189.el8_6.x86_64 #1
RIP: 0010:0x0
Code: Unable to access opcode bytes at RIP 0xffffffffffffffd6.
..
Call Trace:
<IRQ>
__do_set_cpus_allowed+0x16c/0x180
__set_cpus_allowed_ptr+0x11e/0x170
migrate_enable+0xb8/0x380
? __wake_up_common+0x7a/0x190
rt_spin_unlock+0xe/0x40
__wake_up_common_lock+0x81/0xb0
uio_event_notify+0x28/0x40 [uio]
igbuio_pci_irqhandler+0x1a/0x40 [igb_uio]
__handle_irq_event_percpu+0x4c/0x240
? cpuidle_enter_state+0xe5/0x470
handle_irq_event_percpu+0x55/0xa0
? _raw_spin_unlock_irqrestore+0x20/0x60
handle_irq_event+0x58/0x9d
handle_edge_irq+0xb3/0x250
handle_irq+0x1f/0x30
do_IRQ+0x79/0x130
common_interrupt+0xf/0xf
</IRQ>
Resolution
- Engage the module provider of
igb_uiofor further investigation on this issue. - Check with them if any latest patch is available to fix the issue.
Root Cause
- The issue is caused by a third party DPDK module
igb_uio. Theigbuio_pci_irqhandler()function from this module calleduio_event_notify()in interrupt context. This is incorrect becauseuio_event_notify()may sleep which is not allowed in interrupts.
Diagnostic Steps
- Here is the backtrace of the panic task.
crash> bt
PID: 0 TASK: ffffffffb5e22840 CPU: 0 COMMAND: "swapper/0"
#0 [ff5640c0c0003ad8] machine_kexec at ffffffffb486105e
#1 [ff5640c0c0003b30] __crash_kexec at ffffffffb49adf1d
#2 [ff5640c0c0003bf8] crash_kexec at ffffffffb49af284
#3 [ff5640c0c0003c08] oops_end at ffffffffb481e758
#4 [ff5640c0c0003c28] no_context at ffffffffb4873108
#5 [ff5640c0c0003c80] __bad_area_nosemaphore at ffffffffb4873474
#6 [ff5640c0c0003cc8] do_page_fault at ffffffffb4873e44
#7 [ff5640c0c0003d00] page_fault at ffffffffb520105e
[exception RIP: unknown or invalid address]
RIP: 0000000000000000 RSP: ff5640c0c0003db0 RFLAGS: 00010002
RAX: 0000000000000000 RBX: ffffffffb5e22840 RCX: 0000000000000001
RDX: 000000000000000a RSI: ffffffffb5e22840 RDI: ff2ab4cd3f02a040
RBP: ff2ab4cd3f02a040 R8: ffffffffb5e09380 R9: ffffffffb63e771c
R10: ffffffffb63e7718 R11: 0000000000000000 R12: 0000000000000004
R13: ffffffffb5e22840 R14: ffffffffb5e22c08 R15: ffffffffb5e22c08
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
#8 [ff5640c0c0003db0] __do_set_cpus_allowed at ffffffffb491fe4c
#9 [ff5640c0c0003de0] __set_cpus_allowed_ptr at ffffffffb492187e
#10 [ff5640c0c0003e30] migrate_enable at ffffffffb4922258
#11 [ff5640c0c0003e80] rt_spin_unlock at ffffffffb51d6bde
#12 [ff5640c0c0003e90] __wake_up_common_lock at ffffffffb493dc31
#13 [ff5640c0c0003ef0] uio_event_notify at ffffffffc0322088 [uio]
#14 [ff5640c0c0003f00] igbuio_pci_irqhandler at ffffffffc032c20a [igb_uio]
#15 [ff5640c0c0003f10] __handle_irq_event_percpu at ffffffffb495f4dc
#16 [ff5640c0c0003f50] handle_irq_event_percpu at ffffffffb495f725
#17 [ff5640c0c0003f80] handle_irq_event at ffffffffb495f7c8
#18 [ff5640c0c0003fa0] handle_edge_irq at ffffffffb49641f3
#19 [ff5640c0c0003fb8] handle_irq at ffffffffb481de0f
#20 [ff5640c0c0003fc0] do_IRQ at ffffffffb5201d39
- Looking at the stack of panic task, we can see
igbuio_pci_irqhandler()function calleduio_event_notify()in interrupt context. - This is incorrect in Real time kernel because
uio_event_notify()may sleep which is not allowed in interrupts. - The
uio_event_notify()function may sleep because of calls tospin_lock_irqsave()which onPREEMPT_RTkernel may sleep. - The kernel reported that this happened and here is the stack trace from log:
[179914.299116] BUG: scheduling while atomic: swapper/0/0/0x00010002
[179914.299120] Modules linked in: (...)
[179914.299203] Preemption disabled at:
[179914.299204] [<ffffffffb51d1ebe>] schedule_preempt_disabled+0x1e/0x30
[179914.299211]
[179914.299212] CPU: 0 PID: 0 Comm: swapper/0 Kdump: loaded Tainted: G OE --------- - - 4.18.0-372.32.1.rt7.189.el8_6.x86_64 #1
[179914.299215] Hardware name: RelianceJioInfocomm 5GCCDU/WHITLEY, BIOS 0CCDU500 02/16/2024
[179914.299217] Call Trace:
[179914.299220] <IRQ>
[179914.299223] dump_stack+0x41/0x60
[179914.299229] ? schedule_preempt_disabled+0x1e/0x30
[179914.299232] __schedule_bug.cold.104+0x87/0x94
[179914.299237] __schedule+0x593/0x9b0
[179914.299241] schedule_rtlock+0x14/0x40
[179914.299243] rtlock_slowlock_locked+0x2d7/0x5f0
[179914.299247] ? resched_curr+0x27/0x110
[179914.299251] rt_spin_lock+0x3c/0x50
[179914.299254] ep_poll_callback+0x33/0x230
[179914.299261] __wake_up_common+0x7a/0x190
[179914.299266] __wake_up_common_lock+0x77/0xb0
[179914.299271] uio_event_notify+0x28/0x40 [uio]
[179914.299277] igbuio_pci_irqhandler+0x1a/0x40 [igb_uio]
[179914.299282] __handle_irq_event_percpu+0x4c/0x240
[179914.299286] ? cpuidle_enter_state+0xe5/0x470
[179914.299291] handle_irq_event_percpu+0x55/0xa0
[179914.299294] ? _raw_spin_unlock_irqrestore+0x20/0x60
[179914.299299] handle_irq_event+0x58/0x9d
[179914.299301] handle_edge_irq+0xb3/0x250
[179914.299304] handle_irq+0x1f/0x30
[179914.299310] do_IRQ+0x79/0x130
[179914.299313] common_interrupt+0xf/0xf
[179914.299317] </IRQ>
- Here is the respective source code.
file drivers/uio/uio.c:
423 /**
424 * uio_event_notify - trigger an interrupt event
425 * @info: UIO device capabilities
426 */
427 void uio_event_notify(struct uio_info *info)
428 {
429 struct uio_device *idev = info->uio_dev;
430
431 atomic_inc(&idev->event);
432 wake_up_interruptible(&idev->wait); <<< MAY SLEEP HERE
433 kill_fasync(&idev->async_queue, SIGIO, POLL_IN);
434 }
435 EXPORT_SYMBOL_GPL(uio_event_notify);
227 #define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
file kernel/sched/wait.c:
155 void __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
156 int nr_exclusive, void *key)
157 {
158 __wake_up_common_lock(wq_head, mode, nr_exclusive, 0, key);
159 }
160 EXPORT_SYMBOL(__wake_up);
file kernel/sched/wait.c:
122 static void __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int mode,
123 int nr_exclusive, int wake_flags, void *key)
124 {
125 unsigned long flags;
126 wait_queue_entry_t bookmark;
127
128 bookmark.flags = 0;
129 bookmark.private = NULL;
130 bookmark.func = NULL;
131 INIT_LIST_HEAD(&bookmark.entry);
132
133 spin_lock_irqsave(&wq_head->lock, flags);
134 nr_exclusive = __wake_up_common(wq_head, mode, nr_exclusive, wake_flags, key, &bookmark);
135 spin_unlock_irqrestore(&wq_head->lock, flags);
136
137 while (bookmark.flags & WQ_FLAG_BOOKMARK) {
138 spin_lock_irqsave(&wq_head->lock, flags);
139 nr_exclusive = __wake_up_common(wq_head, mode, nr_exclusive,
140 wake_flags, key, &bookmark);
141 spin_unlock_irqrestore(&wq_head->lock, flags);
142 }
143 }
144
file kernel/sched/wait.c:
80 static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
81 int nr_exclusive, int wake_flags, void *key,
82 wait_queue_entry_t *bookmark)
83 {
84 wait_queue_entry_t *curr, *next;
85 int cnt = 0;
86
(...)
98 list_for_each_entry_safe_from(curr, next, &wq_head->head, entry) {
99 unsigned flags = curr->flags;
100 int ret;
101
102 if (flags & WQ_FLAG_BOOKMARK)
103 continue;
104
105 ret = curr->func(curr, mode, wake_flags, key); <<<<<< MAY CALL HERE A FUNCTION WHICH MAY SLEEP
106 if (ret < 0)
107 break;
108 if (ret && (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
109 break;
110
111 if (bookmark && (++cnt > WAITQUEUE_WALK_BREAK_CNT) &&
112 (&next->entry != &wq_head->head)) {
113 bookmark->flags = WQ_FLAG_BOOKMARK;
114 list_add_tail(&bookmark->entry, &next->entry);
115 break;
116 }
117 }
118
119 return nr_exclusive;
120 }
1121 static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
1122 {
1123 int pwake = 0;
1124 unsigned long flags;
1125 struct epitem *epi = ep_item_from_wait(wait);
1126 struct eventpoll *ep = epi->ep;
1127 __poll_t pollflags = key_to_poll(key);
1128 int ewake = 0;
1129
1130 spin_lock_irqsave(&ep->lock, flags); <<<<< KERNEL TRIED TO RESCHEDULE ON THIS SPINLOCK AND REPORTED WARNING
1131
1132 ep_set_busy_poll_napi_id(epi);
This solution is part of Red Hat’s fast-track publication program, providing a huge library of solutions that Red Hat engineers have created while supporting our customers. To give you the knowledge you need the instant it becomes available, these articles may be presented in a raw and unedited form.
Comments