udevd continues to trigger on not ready (NR) devices making dmesg usage unusable for troubleshooting
Issue
udevd
continues to trigger on not ready (NR) devices makingdmesg
usage unusable for troubleshooting.
Note: a RHEL5 box hooked to the same Symmetrix
with visibility to the same devices does not exhibit this behavior.
Because of the this behavior admins can not make use of dmesg
for troubleshooting as the error handling floods the kernel ring buffer
With removal of powerpath
and dm-multipath
the issue still occurs.
To prove that udevd
seems to be the initiator we used the following systemtap
script.
Based on the user knowing a device that aligns to a not ready lun
we watched vfs.read/vfs.write
for this device
#! /usr/bin/env stap
global disk_major, disk_minor, diskname, rq_timeout, rq_retries, rq_deadline, dev_id, device_of_interest
probe begin {
printf("Enabling Probes.\n");
}
probe kernel.statement("blk_update_request@blk-core.c:2225")
{
#printf("%-s %-s %-s\n", "COMMAND", "PID", "TASK_STRUCT")
#printf("%-s %-d %-p\n", kernel_string($t->comm), $t->pid, $t)
req_addr = @choose_defined($req, $rq);
if(@cast(req_addr,"request")->rq_disk == 0) {
disk_major = -1
disk_minor = -1
} else {
disk_major = @cast(req_addr,"request")->rq_disk->major
disk_minor = @cast(req_addr,"request")->rq_disk->first_minor
diskname = @cast(req_addr,"request")->rq_disk->disk_name
rq_timeout = @cast(req_addr,"request")->timeout
rq_retries = @cast(req_addr,"request")->retries
rq_deadline = @cast(req_addr,"request")->deadline
dev_id = &@cast(req_addr, "request")->rq_disk->part0->__dev
device_of_interest = MKDEV(disk_major,disk_minor)
}
strdiskname = kernel_string(diskname)
if (strdiskname == "sdew") {
printf(" \n")
printf("blk_update_request diskname %-s\n", kernel_string(diskname))
printf("blk_update_request major minor %-d %-d\n", disk_major, disk_minor)
printf("blk_update_request error %-d\n", $error)
printf("blk_update_request timeout %-d\n", rq_timeout)
printf("blk_update_request retries %-d\n", rq_retries)
printf("blk_update_request deadline %-d\n",rq_deadline)
printf("blk_update_request device %-x\n",dev_id)
printf("blk_update_request device_of_interest %04x\n",device_of_interest)
#device_state = get_devstate_from_req($q)
#device_state_str = describe_device_state(device_state)
#printf("%-s\n", device_state_str)
}
}
probe vfs.write, vfs.read
{
#dev_str = "800010"
dev_str = "8100080"
device_of_interest_str = sprintf("%04x", device_of_interest);
if (dev_str == device_of_interest_str)
printf ("%s(%d) %s 0x%x\n",
execname(), pid(), ppfunc(), dev)
}
probe end {
printf("Disabling Probes.\n");
}
here's a snapshot, note there is a 900 second pause between triggers, seems like some sort of timeout, but the trigger continues over and over.
Note when this happens the end_request errors flow into the kernel ring buffer overrunning, making dmesg
not very usable for troubleshooting.
increasing log_buf_le
n doesn't help
2014-08-21 11:17:54 blk_update_request device_of_interest 8100080
2014-08-21 11:32:54
2014-08-21 11:32:54 blk_update_request diskname sdew
2014-08-21 11:32:54 blk_update_request major minor 129 128
2014-08-21 11:32:54 blk_update_request error 0
2014-08-21 11:32:54 blk_update_request timeout 7000
2014-08-21 11:32:54 blk_update_request retries 0
2014-08-21 11:32:54 blk_update_request deadline 4296644413
2014-08-21 11:32:54 blk_update_request device ffff8817f7afc460
2014-08-21 11:32:54 blk_update_request device_of_interest 8100080
2014-08-21 11:32:54
2014-08-21 11:32:54 blk_update_request diskname sdew
2014-08-21 11:32:54 blk_update_request major minor 129 128
2014-08-21 11:32:54 blk_update_request error 0
2014-08-21 11:32:54 blk_update_request timeout 7000
2014-08-21 11:32:54 blk_update_request retries 0
2014-08-21 11:32:54 blk_update_request deadline 4296644414
2014-08-21 11:32:54 blk_update_request device ffff8817f7afc460
2014-08-21 11:32:54 blk_update_request device_of_interest 8100080
2014-08-21 11:32:54 udevd(8285) vfs_read 0x5
2014-08-21 11:32:54 udevd(8329) vfs_write 0x5
2014-08-21 11:32:54 udevd(8684) vfs_read 0xfd00000
2014-08-21 11:32:54 udevd(8439) vfs_read 0x5
2014-08-21 11:32:54 udevd(8684) vfs_read 0xfd00000
2014-08-21 11:32:54 udevd(8346) vfs_read 0x3
2014-08-21 11:32:54 udevd(8297) vfs_read 0x5
2014-08-21 11:32:54 udevd(8361) vfs_read 0x3
2014-08-21 11:32:54 udevd(8419) vfs_read 0x3
continue end_request filling printk
log bug making dmesg
usage for troubleshooting unavailable
Environment
- Red Hat Enterprise Linux 6.5
Subscriber exclusive content
A Red Hat subscription provides unlimited access to our knowledgebase, tools, and much more.