WARNING: at drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c:399 vmw_cmdbuf_ctx_process+0x258/0x280 [vmwgfx]

Solution Verified - Updated -

Environment

  • Red Hat Enterprise Linux 8
  • Red Hat Enterprise Linux 9
  • VMware

Issue

  • Insufficient amount of graphical memory causing mob memory overflow

Resolution

  • Engage the VMware vendor to check the required amount of video memory and increase it.

Root Cause

increasing guest mob limits come from ./drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c

    52  static int vmw_gmrid_man_get_node(struct ttm_resource_manager *man,
    53                                    struct ttm_buffer_object *bo,
    54                                    const struct ttm_place *place,
    55                                    struct ttm_resource **res)
    56  {
    57          struct vmwgfx_gmrid_man *gman = to_gmrid_manager(man);
    58          int id;
    59  
    60          *res = kmalloc(sizeof(**res), GFP_KERNEL);
    61          if (!*res)
    62                  return -ENOMEM;
    63  
    64          ttm_resource_init(bo, place, *res);
    65  
    66          id = ida_alloc_max(&gman->gmr_ida, gman->max_gmr_ids - 1, GFP_KERNEL);
    67          if (id < 0)
    68                  return id;
    69  
    70          spin_lock(&gman->lock);
    71  
    72          if (gman->max_gmr_pages > 0) {
    73                  gman->used_gmr_pages += PFN_UP((*res)->size);
    74                  /*
    75                   * Because the graphics memory is a soft limit we can try to
    76                   * expand it instead of letting the userspace apps crash.
    77                   * We're just going to have a sane limit (half of RAM)
    78                   * on the number of MOB's that we create and will try to keep
    79                   * the system running until we reach that.
    80                   */
    81                  if (unlikely(gman->used_gmr_pages > gman->max_gmr_pages)) {
    82                          const unsigned long max_graphics_pages = totalram_pages() / 2;
    83                          uint32_t new_max_pages = 0;
    84  
    85                          DRM_WARN("vmwgfx: mob memory overflow. Consider increasing guest RAM and graphicsMemory.\n");
    86                          vmw_host_printf("vmwgfx, warning: mob memory overflow. Consider increasing guest RAM and graphicsMemory.\n");
    87  
    88                          if (gman->max_gmr_pages > (max_graphics_pages / 2)) {
    89                                  DRM_WARN("vmwgfx: guest requires more than half of RAM for graphics.\n");
    90                                  new_max_pages = max_graphics_pages;
    91                          } else
    92                                  new_max_pages = gman->max_gmr_pages * 2;
    93                          if (new_max_pages > gman->max_gmr_pages && new_max_pages >= gman->used_gmr_pages) {
    94                                  DRM_WARN("vmwgfx: increasing guest mob limits to %u kB.\n",          <<-------
    95                                           ((new_max_pages) << (PAGE_SHIFT - 10)));
    96  
    97                                  gman->max_gmr_pages = new_max_pages;
    98                          } else {
    99                                  char buf[256];
   100                                  snprintf(buf, sizeof(buf),
   101                                           "vmwgfx, error: guest graphics is out of memory (mob limit at: %ukB).\n",
   102                                           ((gman->max_gmr_pages) << (PAGE_SHIFT - 10)));
   103                                  vmw_host_printf(buf);
   104                                  DRM_WARN("%s", buf);
   105                                  goto nospace;
   106                          }
   107                  }
   108          }
   109  
   110          (*res)->start = id;
   111  
   112          spin_unlock(&gman->lock);
   113          return 0;
   114  
   115  nospace:
   116          gman->used_gmr_pages -= PFN_UP((*res)->size);
   117          spin_unlock(&gman->lock);
   118          ida_free(&gman->gmr_ida, id);
   119          ttm_resource_fini(man, *res);
   120          kfree(*res);
   121          return -ENOSPC;
   122  }

Command buffer error comes from ./drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c

   366  /**
   367   * vmw_cmdbuf_ctx_process - Process a command buffer context.
   368   *
   369   * @man: The command buffer manager.
   370   * @ctx: The command buffer context.
   371   * @notempty: Pass back count of non-empty command submitted lists.
   372   *
   373   * Submit command buffers to hardware if possible, and process finished
   374   * buffers. Typically freeing them, but on preemption or error take
   375   * appropriate action. Wake up waiters if appropriate.
   376   */
   377  static void vmw_cmdbuf_ctx_process(struct vmw_cmdbuf_man *man,
   378                                     struct vmw_cmdbuf_context *ctx,
   379                                     int *notempty)
   380  {
   381          struct vmw_cmdbuf_header *entry, *next;
   382  
   383          vmw_cmdbuf_ctx_submit(man, ctx);
   384  
   385          list_for_each_entry_safe(entry, next, &ctx->hw_submitted, list) {
   386                  SVGACBStatus status = entry->cb_header->status;
   387  
   388                  if (status == SVGA_CB_STATUS_NONE)
   389                          break;
   390  
   391                  list_del(&entry->list);
   392                  wake_up_all(&man->idle_queue);
   393                  ctx->num_hw_submitted--;
   394                  switch (status) {
   395                  case SVGA_CB_STATUS_COMPLETED:
   396                          __vmw_cmdbuf_header_free(entry);
   397                          break;
   398                  case SVGA_CB_STATUS_COMMAND_ERROR:
   399                          WARN_ONCE(true, "Command buffer error.\n");              <<<<-----
   400                          entry->cb_header->status = SVGA_CB_STATUS_NONE;
   401                          list_add_tail(&entry->list, &man->error);
   402                          schedule_work(&man->work);
   403                          break;
   404                  case SVGA_CB_STATUS_PREEMPTED:
   405                          entry->cb_header->status = SVGA_CB_STATUS_NONE;
   406                          list_add_tail(&entry->list, &ctx->preempted);
   407                          break;
   408                  case SVGA_CB_STATUS_CB_HEADER_ERROR:
   409                          WARN_ONCE(true, "Command buffer header error.\n");
   410                          __vmw_cmdbuf_header_free(entry);
   411                          break;
   412                  default:
   413                          WARN_ONCE(true, "Undefined command buffer status.\n");
   414                          __vmw_cmdbuf_header_free(entry);
   415                          break;
   416                  }
   417          }
   418  
   419          vmw_cmdbuf_ctx_submit(man, ctx);
   420          if (!list_empty(&ctx->submitted))
   421                  (*notempty)++;
   422  }

Diagnostic Steps

  • Warning message is showing up randomly.
[326360.734253] [drm] vmwgfx: mob memory overflow. Consider increasing guest RAM and graphicsMemory.
[326360.734770] [drm] vmwgfx: increasing guest mob limits to 49152 kB.
[326360.735028] ------------[ cut here ]------------
[326360.735030] Command buffer error.
[326360.735059] WARNING: CPU: 1 PID: 488 at drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c:399 vmw_cmdbuf_ctx_process+0x258/0x280 [vmwgfx]
[326360.735076] Modules linked in: nf_tables nfnetlink rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache cfg80211 rfkill vsock_loopback vmw_vsock_virtio_transport_common vmw_vsock_vmci_transport vsock sunrpc crct10dif_pclmul crc32_pclmul vmw_balloon ghash_clmulni_intel joydev pcspkr vmw_vmci i2c_piix4 xfs libcrc32c sr_mod cdrom ata_generic sd_mod t10_pi sg vmwgfx drm_ttm_helper ttm drm_kms_helper crc32c_intel syscopyarea sysfillrect sysimgblt serio_raw drm ata_piix libata vmw_pvscsi e1000 dm_mod fuse
[326360.735101] CPU: 1 PID: 488 Comm: irq/16-vmwgfx Kdump: loaded Not tainted 4.18.0-553.5.1.el8_10.x86_64 #1
[326360.735103] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020
[326360.735104] RIP: 0010:vmw_cmdbuf_ctx_process+0x258/0x280 [vmwgfx]
[326360.735110] Code: 38 83 00 01 48 83 c4 40 5b 5d 41 5c 41 5d 41 5e 41 5f c3 cc cc cc cc 48 c7 c7 ad 48 36 c0 c6 05 0a 27 03 00 01 e8 e5 53 1b ee <0f> 0b e9 53 ff ff ff 48 c7 c7 c4 48 36 c0 c6 05 ef 26 03 00 01 e8
[326360.735112] RSP: 0000:ffffb86a43c87dc8 EFLAGS: 00010282
[326360.735114] RAX: 0000000000000000 RBX: ffff8a9373e99c18 RCX: 0000000000000027
[326360.735115] RDX: 0000000000000027 RSI: 00000000ffff7fff RDI: ffff8a972bc9e690
[326360.735115] RBP: ffff8a9321dac8a8 R08: 0000000000000000 R09: c0000000ffff7fff
[326360.735116] R10: 0000000000000001 R11: ffffb86a43c87be0 R12: ffff8a9321dac8a0
[326360.735117] R13: 0000000000000003 R14: dead000000000100 R15: ffff8a9373e99c00
[326360.735118] FS:  0000000000000000(0000) GS:ffff8a972bc80000(0000) knlGS:0000000000000000
[326360.735119] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[326360.735120] CR2: 00007f54374ea200 CR3: 000000012fac6000 CR4: 00000000000006e0
[326360.735134] Call Trace:
[326360.735137]  ? __warn+0x94/0xe0
[326360.735141]  ? vmw_cmdbuf_ctx_process+0x258/0x280 [vmwgfx]
[326360.735147]  ? vmw_cmdbuf_ctx_process+0x258/0x280 [vmwgfx]
[326360.735152]  ? report_bug+0xb1/0xe0
[326360.735156]  ? do_error_trap+0x9e/0xd0
[326360.735159]  ? do_invalid_op+0x36/0x40
[326360.735160]  ? vmw_cmdbuf_ctx_process+0x258/0x280 [vmwgfx]
[326360.735165]  ? invalid_op+0x14/0x20
[326360.735169]  ? vmw_cmdbuf_ctx_process+0x258/0x280 [vmwgfx]
[326360.735174]  ? vmw_cmdbuf_ctx_process+0x258/0x280 [vmwgfx]
[326360.735179]  ? irq_finalize_oneshot.part.48+0xf0/0xf0
[326360.735184]  vmw_cmdbuf_man_process+0x59/0x100 [vmwgfx]
[326360.735189]  vmw_cmdbuf_irqthread+0x21/0x40 [vmwgfx]
[326360.735194]  vmw_thread_fn+0x36/0x70 [vmwgfx]
[326360.735201]  irq_thread_fn+0x1f/0x60
[326360.735219]  irq_thread+0x100/0x190
[326360.735222]  ? irq_forced_thread_fn+0x70/0x70
[326360.735223]  ? irq_thread_check_affinity+0xf0/0xf0
[326360.735225]  kthread+0x134/0x150
[326360.735228]  ? set_kthread_struct+0x50/0x50
[326360.735230]  ret_from_fork+0x35/0x40
[326360.735233] ---[ end trace e951607802a68d63 ]---

This solution is part of Red Hat’s fast-track publication program, providing a huge library of solutions that Red Hat engineers have created while supporting our customers. To give you the knowledge you need the instant it becomes available, these articles may be presented in a raw and unedited form.

Comments