1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
|
From 649b995d6f87e0ddd5c36fbf869ea7a258c46380 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 3 Dec 2019 10:13:46 +0000
Subject: [PATCH 05/11] drm/i915/gem: Take runtime-pm wakeref prior to
unbinding
Some machines require ACPI for runtime resume, and ACPI is quite kmalloc
happy. We cannot handle kmalloc from inside the vm->mutex, as they are
used by the shrinker, and so we must ensure the global runtime-pm is
awake prior to unbinding to avoid the potential inversion.
<4> [57.121748] ======================================================
<4> [57.121750] WARNING: possible circular locking dependency detected
<4> [57.121753] 5.4.0-rc8-CI-CI_DRM_7466+ #1 Tainted: G U
<4> [57.121754] ------------------------------------------------------
<4> [57.121756] i915_pm_rpm/1105 is trying to acquire lock:
<4> [57.121758] ffffffff82263a40 (fs_reclaim){+.+.}, at: fs_reclaim_acquire.part.117+0x0/0x30
<4> [57.121766]
but task is already holding lock:
<4> [57.121768] ffff888475a593c0 (&vm->mutex){+.+.}, at: i915_vma_unbind+0x21/0x50 [i915]
<4> [57.121868]
which lock already depends on the new lock.
<4> [57.121869]
the existing dependency chain (in reverse order) is:
<4> [57.121871]
-> #1 (&vm->mutex){+.+.}:
<4> [57.121951] i915_gem_shrinker_taints_mutex+0xa2/0xd0 [i915]
<4> [57.122028] i915_address_space_init+0xa9/0x170 [i915]
<4> [57.122104] i915_ggtt_init_hw+0x47/0x130 [i915]
<4> [57.122150] i915_driver_probe+0xbb4/0x15f0 [i915]
<4> [57.122197] i915_pci_probe+0x43/0x1c0 [i915]
<4> [57.122202] pci_device_probe+0x9e/0x120
<4> [57.122206] really_probe+0xea/0x420
<4> [57.122209] driver_probe_device+0x10b/0x120
<4> [57.122212] device_driver_attach+0x4a/0x50
<4> [57.122214] __driver_attach+0x97/0x130
<4> [57.122217] bus_for_each_dev+0x74/0xc0
<4> [57.122220] bus_add_driver+0x142/0x220
<4> [57.122222] driver_register+0x56/0xf0
<4> [57.122226] do_one_initcall+0x58/0x2ff
<4> [57.122230] do_init_module+0x56/0x1f8
<4> [57.122233] load_module+0x243e/0x29f0
<4> [57.122236] __do_sys_finit_module+0xe9/0x110
<4> [57.122239] do_syscall_64+0x4f/0x210
<4> [57.122242] entry_SYSCALL_64_after_hwframe+0x49/0xbe
<4> [57.122244]
-> #0 (fs_reclaim){+.+.}:
<4> [57.122249] __lock_acquire+0x1328/0x15d0
<4> [57.122251] lock_acquire+0xa7/0x1c0
<4> [57.122254] fs_reclaim_acquire.part.117+0x24/0x30
<4> [57.122257] __kmalloc+0x48/0x320
<4> [57.122261] acpi_ns_internalize_name+0x44/0x9b
<4> [57.122264] acpi_ns_get_node_unlocked+0x6b/0xd3
<4> [57.122267] acpi_ns_get_node+0x3b/0x50
<4> [57.122271] acpi_get_handle+0x8a/0xb4
<4> [57.122274] acpi_has_method+0x1c/0x40
<4> [57.122278] acpi_pci_set_power_state+0x40/0xe0
<4> [57.122281] pci_platform_power_transition+0x3e/0x90
<4> [57.122284] pci_set_power_state+0x83/0xf0
<4> [57.122287] pci_restore_standard_config+0x22/0x40
<4> [57.122289] pci_pm_runtime_resume+0x23/0xc0
<4> [57.122293] __rpm_callback+0xb1/0x110
<4> [57.122296] rpm_callback+0x1a/0x70
<4> [57.122299] rpm_resume+0x50e/0x790
<4> [57.122302] __pm_runtime_resume+0x42/0x80
<4> [57.122357] __intel_runtime_pm_get+0x15/0x60 [i915]
<4> [57.122435] ggtt_unbind_vma+0x24/0x60 [i915]
<4> [57.122514] __i915_vma_unbind.part.39+0xb5/0x500 [i915]
<4> [57.122593] i915_vma_unbind+0x2d/0x50 [i915]
<4> [57.122668] i915_gem_object_unbind+0x11c/0x260 [i915]
<4> [57.122740] i915_gem_object_set_cache_level+0x32/0x90 [i915]
<4> [57.122810] i915_gem_set_caching_ioctl+0x1f7/0x2f0 [i915]
<4> [57.122815] drm_ioctl_kernel+0xa7/0xf0
<4> [57.122818] drm_ioctl+0x2e1/0x390
<4> [57.122822] do_vfs_ioctl+0xa0/0x6f0
<4> [57.122825] ksys_ioctl+0x35/0x60
<4> [57.122828] __x64_sys_ioctl+0x11/0x20
<4> [57.122830] do_syscall_64+0x4f/0x210
<4> [57.122833] entry_SYSCALL_64_after_hwframe+0x49/0xbe
Closes: https://gitlab.freedesktop.org/drm/intel/issues/711
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191203101347.2836057-1-chris@chris-wilson.co.uk
---
drivers/gpu/drm/i915/i915_gem.c | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3f07948ea4da..ed2436db5dd2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -119,10 +119,23 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
unsigned long flags)
{
- struct i915_vma *vma;
+ struct intel_runtime_pm *rpm = &to_i915(obj->base.dev)->runtime_pm;
LIST_HEAD(still_in_list);
+ intel_wakeref_t wakeref;
+ struct i915_vma *vma;
int ret = 0;
+ if (!atomic_read(&obj->bind_count))
+ return 0;
+
+ /*
+ * As some machines use ACPI to handle runtime-resume callbacks, and
+ * ACPI is quite kmalloc happy, we cannot resume beneath the vm->mutex
+ * as they are required by the shrinker. Ergo, we wake the device up
+ * first just in case.
+ */
+ wakeref = intel_runtime_pm_get(rpm);
+
spin_lock(&obj->vma.lock);
while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
struct i915_vma,
@@ -146,6 +159,8 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
list_splice(&still_in_list, &obj->vma.list);
spin_unlock(&obj->vma.lock);
+ intel_runtime_pm_put(rpm, wakeref);
+
return ret;
}
--
2.25.2
|