Host compute3 high usage consuming Memory
Issue
- One of our compute consumes almost all its available RAM when only half of it is actually allocated to VMs:
[root@overcloud-compute-0 ~]# free -m
total used free shared buff/cache available
Mem: 515698 401225 103072 21 11401 113049
Swap: 0 0 0
- On this compute, there is 54 VMs and the following amount of ram is allocated to them through nova:
# grep -r memory * | grep -v unit | awk -F\> '{ print $2 }' | sed -e 's/<.*//' | awk 'BEGIN {a=0}{a+=$1}END{ print "total " a "MB"}'
total 256000 MB
- There is currently 424GB of ram being actively consumed:
# ps aufxg | awk 'BEGIN {a=0}{a+=$6}END{ print "total " a "KB"}'
total 424905412MB
- The following process are the top 10 RAM consumers:
root 713002 2.6 2.7 14717928 14673084 ? S Apr10 4831:07 /usr/lib/systemd/systemd-udevd
qemu 435663 0.4 3.1 17435412 16386548 ? Sl Apr10 843:39 /usr/libexec/qemu-kvm -name guest=instance-00000405,debug-threads=on -S -object secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-26-instance-00000405/master-key.aes -machine pc-i440fx-rhel7.6.0,accel=kvm,usb=off,dump-guest-core=off -cpu Broadwell-IBRS,vme=on,ss=on,f16c=on,rdrand=on,hypervisor=on,arat=on,tsc_adjust=on,md-clear=on,stibp=on,ssbd=on,xsaveopt=on,pdpe1gb=on,abm=on -m 16384 -realtime mlock=off -smp 4,sockets=4,cores=1,threads=1 -uuid 881b75ee-f33f-4fab-9c9c-83ba11e01317 -smbios type=1,manufacturer=Red Hat,product=OpenStack Compute,version=17.0.10-6.el7ost,serial=66e686ec-7c4d-1000-95bf-54ab3a591010,uuid=881b75ee-f33f-4fab-9c9c-83ba11e01317,family=Virtual Machine -no-user-config -nodefaults -chardev socket,id=charmonitor,fd=103,server,nowait -mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc,driftfix=slew -global kvm-pit.lost_tick_policy=delay -no-hpet -no-shutdown -boot strict=on -device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 -drive file=/dev/disk/by-id/dm-uuid-mpath-360060e8012314f005040314f00000374,format=raw,if=none,id=drive-virtio-disk0,serial=44d58b5c-5b44-4f47-ac2a-96aad09ef653,cache=none,aio=native -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x5,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1,write-cache=on -drive file=/dev/disk/by-id/dm-uuid-mpath-360060e8012314f005040314f0000038e,format=raw,if=none,id=drive-virtio-disk1,serial=c9e66ba0-9306-4467-8ba0-87a7d3c83ba1,cache=none,aio=native -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x6,drive=drive-virtio-disk1,id=virtio-disk1,write-cache=on -drive file=/dev/disk/by-id/dm-uuid-mpath-360060e8012314f005040314f000003a4,format=raw,if=none,id=drive-virtio-disk2,serial=13bac374-fb26-4c43-939b-d04367355b37,cache=none,aio=native -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x7,drive=drive-virtio-disk2,id=virtio-disk2,write-cache=on -drive file=/dev/disk/by-id/dm-uuid-mpath-360060e8012314f005040314f000003a9,format=raw,if=none,id=drive-virtio-disk3,serial=439f9efe-f238-4565-b4c0-06b3917a2403,cache=none,aio=native -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x8,drive=drive-virtio-disk3,id=virtio-disk3,write-cache=on -drive file=/dev/disk/by-id/dm-uuid-mpath-360060e8012314f005040314f000003e1,format=raw,if=none,id=drive-virtio-disk4,serial=cbce97ba-4c80-433c-9ef7-d1ac69798cf8,cache=none,aio=native -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x9,drive=drive-virtio-disk4,id=virtio-disk4,write-cache=on -drive file=/dev/disk/by-id/dm-uuid-mpath-360060e8012314f005040314f0000043c,format=raw,if=none,id=drive-virtio-disk5,serial=6e930d27-fbd9-4462-a8d2-16b202b0bc11,cache=none,aio=native -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0xa,drive=drive-virtio-disk5,id=virtio-disk5,write-cache=on -drive file=/dev/disk/by-id/dm-uuid-mpath-360060e8012314f005040314f0000043b,format=raw,if=none,id=drive-virtio-disk6,serial=3aaa7110-6f8f-46ec-94fa-89f646aefac1,cache=none,aio=native -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0xb,drive=drive-virtio-disk6,id=virtio-disk6,write-cache=on -netdev tap,fd=105,id=hostnet0,vhost=on,vhostfd=106 -device virtio-net-pci,rx_queue_size=512,host_mtu=1450,netdev=hostnet0,id=net0,mac=fa:16:3e:61:f7:59,bus=pci.0,addr=0x3 -netdev tap,fd=107,id=hostnet1,vhost=on,vhostfd=108 -device virtio-net-pci,rx_queue_size=512,host_mtu=1450,netdev=hostnet1,id=net1,mac=fa:16:3e:53:47:de,bus=pci.0,addr=0x4 -add-fd set=5,fd=110 -chardev pty,id=charserial0,logfile=/dev/fdset/5,logappend=on -device isa-serial,chardev=charserial0,id=serial0 -device usb-tablet,id=input0,bus=usb.0,port=1 -vnc 10.154.155.19:24 -k en-us -device cirrus-vga,id=video0,bus=pci.0,addr=0x2 -incoming defer -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0xc -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny -msg timestamp=on
root 1079 84.7 3.4 18118556 18075100 ? Rs Apr10 155703:57 /usr/lib/systemd/systemd-udevd
root 700480 10.3 3.7 19796620 19751788 ? S Apr10 18958:15 /usr/lib/systemd/systemd-udevd
root 700482 9.3 4.8 25816260 25771432 ? S Apr10 17159:23 /usr/lib/systemd/systemd-udevd
root 700488 8.3 4.9 26139852 26095060 ? S Apr10 15277:56 /usr/lib/systemd/systemd-udevd
root 700470 22.1 5.0 26922556 26877736 ? S Apr10 40592:53 /usr/lib/systemd/systemd-udevd
qemu 878663 120 6.3 34330504 33631968 ? Sl Apr10 220701:12 /usr/libexec/qemu-kvm -name guest=instance-00000f3e,debug-threads=on -S -object secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-63-instance-00000f3e/master-key.aes -machine pc-i440fx-rhel7.6.0,accel=kvm,usb=off,dump-guest-core=off -cpu Broadwell-IBRS,vme=on,ss=on,f16c=on,rdrand=on,hypervisor=on,arat=on,tsc_adjust=on,md-clear=on,stibp=on,ssbd=on,xsaveopt=on,pdpe1gb=on,abm=on -m 32768 -realtime mlock=off -smp 8,sockets=8,cores=1,threads=1 -uuid 65fc64b8-6024-4b99-bec2-59745ebd3e00 -smbios type=1,manufacturer=Red Hat,product=OpenStack Compute,version=17.0.10-6.el7ost,serial=f606d278-7c7e-1000-be7a-54ab3aee2cd1,uuid=65fc64b8-6024-4b99-bec2-59745ebd3e00,family=Virtual Machine -no-user-config -nodefaults -chardev socket,id=charmonitor,fd=139,server,nowait -mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc,driftfix=slew -global kvm-pit.lost_tick_policy=delay -no-hpet -no-shutdown -boot strict=on -device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 -drive file=/dev/disk/by-id/dm-uuid-mpath-360060e8012314f005040314f00000349,format=raw,if=none,id=drive-virtio-disk0,serial=d41cd1d8-0e1b-47ea-8f64-ab54a6387e60,cache=none,aio=native -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1,write-cache=on -netdev tap,fd=141,id=hostnet0,vhost=on,vhostfd=142 -device virtio-net-pci,rx_queue_size=512,host_mtu=9000,netdev=hostnet0,id=net0,mac=fa:16:3e:5a:f1:68,bus=pci.0,addr=0x3 -add-fd set=3,fd=144 -chardev pty,id=charserial0,logfile=/dev/fdset/3,logappend=on -device isa-serial,chardev=charserial0,id=serial0 -device usb-tablet,id=input0,bus=usb.0,port=1 -vnc 10.154.155.19:60 -k en-us -device cirrus-vga,id=video0,bus=pci.0,addr=0x2 -incoming defer -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5 -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny -msg timestamp=on
qemu 946592 15.5 6.3 34514176 33633264 ? Sl Apr10 28433:41 /usr/libexec/qemu-kvm -name guest=instance-00000d6a,debug-threads=on -S -object secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-68-instance-00000d6a/master-key.aes -machine pc-i440fx-rhel7.6.0,accel=kvm,usb=off,dump-guest-core=off -cpu Broadwell-IBRS,vme=on,ss=on,f16c=on,rdrand=on,hypervisor=on,arat=on,tsc_adjust=on,md-clear=on,stibp=on,ssbd=on,xsaveopt=on,pdpe1gb=on,abm=on -m 32768 -realtime mlock=off -smp 32,sockets=32,cores=1,threads=1 -uuid 3b0aae79-acbf-493f-8bba-b34dd5c5af01 -smbios type=1,manufacturer=Red Hat,product=OpenStack Compute,version=17.0.10-6.el7ost,serial=f606d278-7c7e-1000-be7a-54ab3aee2cd1,uuid=3b0aae79-acbf-493f-8bba-b34dd5c5af01,family=Virtual Machine -no-user-config -nodefaults -chardev socket,id=charmonitor,fd=144,server,nowait -mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc,driftfix=slew -global kvm-pit.lost_tick_policy=delay -no-hpet -no-shutdown -boot strict=on -device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 -drive file=/dev/disk/by-id/dm-uuid-mpath-360060e8012314f005040314f000003d5,format=raw,if=none,id=drive-virtio-disk0,serial=24d09b0b-406c-4cdd-9421-4a864a946956,cache=none,aio=native -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x4,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1,write-cache=on -netdev tap,fd=146,id=hostnet0,vhost=on,vhostfd=147 -device virtio-net-pci,rx_queue_size=512,host_mtu=1450,netdev=hostnet0,id=net0,mac=fa:16:3e:97:9d:50,bus=pci.0,addr=0x3 -add-fd set=3,fd=149 -chardev pty,id=charserial0,logfile=/dev/fdset/3,logappend=on -device isa-serial,chardev=charserial0,id=serial0 -device usb-tablet,id=input0,bus=usb.0,port=1 -vnc 10.154.155.19:65 -k en-us -device cirrus-vga,id=video0,bus=pci.0,addr=0x2 -incoming defer -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5 -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny -msg timestamp=on
root 700458 25.1 12.2 64589408 64544588 ? R Apr10 46114:41 /usr/lib/systemd/systemd-udevd
-
We do notice systemd-udevd is consuming a lot of RAM in various different process which appears to be an abnormal behavior at this time.
-
Some paths are down:
Aug 14 03:22:38 overcloud-compute-0 multipathd: 360060e8012314f005040314f0000037e: sdagb - tur checker reports path is down
- Many paths are failed in
multipath -ll
:
# multipath -ll | grep faulty
|- 11:0:1:108 sdagf 69:1008 failed faulty running
|- 11:0:3:108 sdagg 70:768 failed faulty running
|- 11:0:0:108 sdagh 70:784 failed faulty running
|- 11:0:2:108 sdagi 70:800 failed faulty running
|- 12:0:0:108 sdagj 70:816 failed faulty running
|- 12:0:2:108 sdagk 70:832 failed faulty running
|- 12:0:1:108 sdagl 70:848 failed faulty running
`- 12:0:3:108 sdagm 70:864 failed faulty running
|- 11:0:1:93 sdacn 135:752 failed ready running
|- 11:0:3:93 sdaco 8:768 failed ready running
|- 11:0:0:93 sdacp 8:784 failed ready running
|- 11:0:2:93 sdacq 8:800 failed ready running
|- 12:0:0:93 sdacr 8:816 failed ready running
|- 12:0:2:93 sdacs 8:832 failed ready running
|- 12:0:1:93 sdact 8:848 failed ready running
`- 12:0:3:93 sdacu 8:864 failed ready running
|- 11:0:1:0 sdqv 132:496 failed faulty running
|- 11:0:3:0 sdqw 133:256 failed faulty running
|- 11:0:0:0 sdqx 133:272 failed faulty running
|- 11:0:2:0 sdqy 133:288 failed faulty running
|- 12:0:0:0 sdqz 133:304 failed faulty running
|- 12:0:2:0 sdra 133:320 failed faulty running
|- 12:0:1:0 sdrb 133:336 failed faulty running
`- 12:0:3:0 sdrc 133:352 failed faulty running
|- 11:0:1:107 sdafx 69:880 failed faulty running
|- 11:0:3:107 sdafy 69:896 failed faulty running
|- 11:0:0:107 sdafz 69:912 failed faulty running
|- 11:0:2:107 sdaga 69:928 failed faulty running
|- 12:0:0:107 sdagb 69:944 failed faulty running
|- 12:0:2:107 sdagc 69:960 failed faulty running
|- 12:0:1:107 sdagd 69:976 failed faulty running
`- 12:0:3:107 sdage 69:992 failed faulty running
|- 11:0:1:106 sdafp 68:1008 failed faulty running
|- 11:0:3:106 sdafq 69:768 failed faulty running
|- 11:0:0:106 sdafr 69:784 failed faulty running
|- 11:0:2:106 sdafs 69:800 failed faulty running
|- 12:0:0:106 sdaft 69:816 failed faulty running
|- 12:0:2:106 sdafu 69:832 failed faulty running
|- 12:0:1:106 sdafv 69:848 failed faulty running
`- 12:0:3:106 sdafw 69:864 failed faulty running
pvs
seems to not be able to read many devices:
Error reading device /dev/sdwy15 at 4096 length 4.
Error reading device /dev/sdadd at 0 length 512.
Error reading device /dev/sdadd at 0 length 4.
Error reading device /dev/sdadd at 4096 length 4.
Error reading device /dev/sdaej at 0 length 512.
Error reading device /dev/sdaej at 0 length 4.
Error reading device /dev/sdaej at 4096 length 4.
Error reading device /dev/sdaof at 0 length 512.
Error reading device /dev/sdaof at 0 length 4.
Error reading device /dev/sdaof at 4096 length 4.
Error reading device /dev/sdft at 0 length 512.
Error reading device /dev/sdft at 0 length 4.
Error reading device /dev/sdft at 4096 length 4.
Error reading device /dev/sdamj at 0 length 512.
Error reading device /dev/sdamj at 0 length 4.
Error reading device /dev/sdamj at 4096 length 4.
Error reading device /dev/sdadd1 at 0 length 4.
Error reading device /dev/sdadd1 at 4096 length 4.
Error reading device /dev/sdaej1 at 0 length 4.
Error reading device /dev/sdaej1 at 4096 length 4.
Error reading device /dev/sdaof1 at 0 length 4.
Error reading device /dev/sdaof1 at 4096 length 4.
Error reading device /dev/sdft1 at 0 length 4.
Error reading device /dev/sdft1 at 4096 length 4.
Environment
- Red Hat OpenStack Platform 13.0 (RHOSP)
Subscriber exclusive content
A Red Hat subscription provides unlimited access to our knowledgebase, tools, and much more.