Red Hat Training

A Red Hat training course is available for Red Hat Enterprise Linux

4.3.4. Monitoring Polling Applications

This section describes how to identify and monitor which applications are polling. Doing so allows you to track unnecessary or excessive polling, which can help you pinpoint areas for improvement in terms of CPU usage and power savings.
timeout.stp

#! /usr/bin/env stap
# Copyright (C) 2009 Red Hat, Inc.
# Written by Ulrich Drepper <drepper@redhat.com>
# Modified by William Cohen <wcohen@redhat.com>

global process, timeout_count, to
global poll_timeout, epoll_timeout, select_timeout, itimer_timeout
global nanosleep_timeout, futex_timeout, signal_timeout

probe syscall.poll, syscall.epoll_wait {
  if (timeout) to[pid()]=timeout
}

probe syscall.poll.return {
  p = pid()
  if ($return == 0 && to[p] > 0 ) {
    poll_timeout[p]++
    timeout_count[p]++
    process[p] = execname()
    delete to[p]
  }
}

probe syscall.epoll_wait.return {
  p = pid()
  if ($return == 0 && to[p] > 0 ) {
    epoll_timeout[p]++
    timeout_count[p]++
    process[p] = execname()
    delete to[p]
  }
}

probe syscall.select.return {
  if ($return == 0) {
    p = pid()
    select_timeout[p]++
    timeout_count[p]++
    process[p] = execname()
  }
}

probe syscall.futex.return {
  if (errno_str($return) == "ETIMEDOUT") {
    p = pid()
    futex_timeout[p]++
    timeout_count[p]++
    process[p] = execname()
  }
}

probe syscall.nanosleep.return {
  if ($return == 0) {
    p = pid()
    nanosleep_timeout[p]++
    timeout_count[p]++
    process[p] = execname()
  }
}

probe kernel.function("it_real_fn") {
  p = pid()
  itimer_timeout[p]++
  timeout_count[p]++
  process[p] = execname()
}

probe syscall.rt_sigtimedwait.return {
  if (errno_str($return) == "EAGAIN") {
    p = pid()
    signal_timeout[p]++
    timeout_count[p]++
    process[p] = execname()
  }
}

probe syscall.exit {
  p = pid()
  if (p in process) {
    delete process[p]
    delete timeout_count[p]
    delete poll_timeout[p]
    delete epoll_timeout[p]
    delete select_timeout[p]
    delete itimer_timeout[p]
    delete futex_timeout[p]
    delete nanosleep_timeout[p]
    delete signal_timeout[p]
  }
}

probe timer.s(1) {
  ansi_clear_screen()
  printf ("  pid |   poll  select   epoll  itimer   futex nanosle  signal| process\n")
  foreach (p in timeout_count- limit 20) {
     printf ("%5d |%7d %7d %7d %7d %7d %7d %7d| %-.38s\n", p,
              poll_timeout[p], select_timeout[p],
              epoll_timeout[p], itimer_timeout[p],
              futex_timeout[p], nanosleep_timeout[p],
              signal_timeout[p], process[p])
  }
}

timeout.stp tracks how many times each application used the following system calls over time:
  • poll
  • select
  • epoll
  • itimer
  • futex
  • nanosleep
  • signal
In some applications, these system calls are used excessively. As such, they are normally identified as "likely culprits" for polling applications. Note, however, that an application may be using a different system call to poll excessively; sometimes, it is useful to find out the top system calls used by the system (refer to Section 4.3.5, “Tracking Most Frequently Used System Calls” for instructions). Doing so can help you identify any additional suspects, which you can add to timeout.stp for tracking.

Example 4.14. timeout.stp Sample Output

  uid |   poll  select   epoll  itimer   futex nanosle  signal| process
28937 | 148793       0       0    4727   37288       0       0| firefox
22945 |      0   56949       0       1       0       0       0| scim-bridge
    0 |      0       0       0   36414       0       0       0| swapper
 4275 |  23140       0       0       1       0       0       0| mixer_applet2
 4191 |      0   14405       0       0       0       0       0| scim-launcher
22941 |   7908       1       0      62       0       0       0| gnome-terminal
 4261 |      0       0       0       2       0    7622       0| escd
 3695 |      0       0       0       0       0    7622       0| gdm-binary
 3483 |      0    7206       0       0       0       0       0| dhcdbd
 4189 |   6916       0       0       2       0       0       0| scim-panel-gtk
 1863 |   5767       0       0       0       0       0       0| iscsid
 2562 |      0    2881       0       1       0    1438       0| pcscd
 4257 |   4255       0       0       1       0       0       0| gnome-power-man
 4278 |   3876       0       0      60       0       0       0| multiload-apple
 4083 |      0    1331       0    1728       0       0       0| Xorg
 3921 |   1603       0       0       0       0       0       0| gam_server
 4248 |   1591       0       0       0       0       0       0| nm-applet
 3165 |      0    1441       0       0       0       0       0| xterm
29548 |      0    1440       0       0       0       0       0| httpd
 1862 |      0       0       0       0       0    1438       0| iscsid
You can increase the sample time by editing the timer in the second probe (timer.s()). The output of functioncallcount.stp contains the name and UID of the top 20 polling applications, along with how many times each application performed each polling system call (over time). Example 4.14, “timeout.stp Sample Output” contains an excerpt of the script: