1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
|
From e268337dfe26dfc7efd422a804dbb27977a3cccc Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 17 Jan 2012 15:21:19 -0800
Subject: proc: clean up and fix /proc/<pid>/mem handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
From: Linus Torvalds <torvalds@linux-foundation.org>
commit e268337dfe26dfc7efd422a804dbb27977a3cccc upstream.
Jüri Aedla reported that the /proc/<pid>/mem handling really isn't very
robust, and it also doesn't match the permission checking of any of the
other related files.
This changes it to do the permission checks at open time, and instead of
tracking the process, it tracks the VM at the time of the open. That
simplifies the code a lot, but does mean that if you hold the file
descriptor open over an execve(), you'll continue to read from the _old_
VM.
That is different from our previous behavior, but much simpler. If
somebody actually finds a load where this matters, we'll need to revert
this commit.
I suspect that nobody will ever notice - because the process mapping
addresses will also have changed as part of the execve. So you cannot
actually usefully access the fd across a VM change simply because all
the offsets for IO would have changed too.
Reported-by: Jüri Aedla <asd@ut.ee>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
fs/proc/base.c | 145 +++++++++++++++------------------------------------------
1 file changed, 39 insertions(+), 106 deletions(-)
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -194,65 +194,7 @@ static int proc_root_link(struct inode *
return result;
}
-static struct mm_struct *__check_mem_permission(struct task_struct *task)
-{
- struct mm_struct *mm;
-
- mm = get_task_mm(task);
- if (!mm)
- return ERR_PTR(-EINVAL);
-
- /*
- * A task can always look at itself, in case it chooses
- * to use system calls instead of load instructions.
- */
- if (task == current)
- return mm;
-
- /*
- * If current is actively ptrace'ing, and would also be
- * permitted to freshly attach with ptrace now, permit it.
- */
- if (task_is_stopped_or_traced(task)) {
- int match;
- rcu_read_lock();
- match = (tracehook_tracer_task(task) == current);
- rcu_read_unlock();
- if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH))
- return mm;
- }
-
- /*
- * No one else is allowed.
- */
- mmput(mm);
- return ERR_PTR(-EPERM);
-}
-
-/*
- * If current may access user memory in @task return a reference to the
- * corresponding mm, otherwise ERR_PTR.
- */
-static struct mm_struct *check_mem_permission(struct task_struct *task)
-{
- struct mm_struct *mm;
- int err;
-
- /*
- * Avoid racing if task exec's as we might get a new mm but validate
- * against old credentials.
- */
- err = mutex_lock_killable(&task->signal->cred_guard_mutex);
- if (err)
- return ERR_PTR(err);
-
- mm = __check_mem_permission(task);
- mutex_unlock(&task->signal->cred_guard_mutex);
-
- return mm;
-}
-
-struct mm_struct *mm_for_maps(struct task_struct *task)
+static struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
{
struct mm_struct *mm;
int err;
@@ -263,7 +205,7 @@ struct mm_struct *mm_for_maps(struct tas
mm = get_task_mm(task);
if (mm && mm != current->mm &&
- !ptrace_may_access(task, PTRACE_MODE_READ)) {
+ !ptrace_may_access(task, mode)) {
mmput(mm);
mm = ERR_PTR(-EACCES);
}
@@ -272,6 +214,11 @@ struct mm_struct *mm_for_maps(struct tas
return mm;
}
+struct mm_struct *mm_for_maps(struct task_struct *task)
+{
+ return mm_access(task, PTRACE_MODE_READ);
+}
+
static int proc_pid_cmdline(struct task_struct *task, char * buffer)
{
int res = 0;
@@ -816,38 +763,39 @@ static const struct file_operations proc
static int mem_open(struct inode* inode, struct file* file)
{
- file->private_data = (void*)((long)current->self_exec_id);
+ struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+ struct mm_struct *mm;
+
+ if (!task)
+ return -ESRCH;
+
+ mm = mm_access(task, PTRACE_MODE_ATTACH);
+ put_task_struct(task);
+
+ if (IS_ERR(mm))
+ return PTR_ERR(mm);
+
/* OK to pass negative loff_t, we can catch out-of-range */
file->f_mode |= FMODE_UNSIGNED_OFFSET;
+ file->private_data = mm;
+
return 0;
}
static ssize_t mem_read(struct file * file, char __user * buf,
size_t count, loff_t *ppos)
{
- struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+ int ret;
char *page;
unsigned long src = *ppos;
- int ret = -ESRCH;
- struct mm_struct *mm;
+ struct mm_struct *mm = file->private_data;
- if (!task)
- goto out_no_task;
+ if (!mm)
+ return 0;
- ret = -ENOMEM;
page = (char *)__get_free_page(GFP_TEMPORARY);
if (!page)
- goto out;
-
- mm = check_mem_permission(task);
- ret = PTR_ERR(mm);
- if (IS_ERR(mm))
- goto out_free;
-
- ret = -EIO;
-
- if (file->private_data != (void*)((long)current->self_exec_id))
- goto out_put;
+ return -ENOMEM;
ret = 0;
@@ -874,13 +822,7 @@ static ssize_t mem_read(struct file * fi
}
*ppos = src;
-out_put:
- mmput(mm);
-out_free:
free_page((unsigned long) page);
-out:
- put_task_struct(task);
-out_no_task:
return ret;
}
@@ -889,27 +831,15 @@ static ssize_t mem_write(struct file * f
{
int copied;
char *page;
- struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
unsigned long dst = *ppos;
- struct mm_struct *mm;
+ struct mm_struct *mm = file->private_data;
- copied = -ESRCH;
- if (!task)
- goto out_no_task;
+ if (!mm)
+ return 0;
- copied = -ENOMEM;
page = (char *)__get_free_page(GFP_TEMPORARY);
if (!page)
- goto out_task;
-
- mm = check_mem_permission(task);
- copied = PTR_ERR(mm);
- if (IS_ERR(mm))
- goto out_free;
-
- copied = -EIO;
- if (file->private_data != (void *)((long)current->self_exec_id))
- goto out_mm;
+ return -ENOMEM;
copied = 0;
while (count > 0) {
@@ -933,13 +863,7 @@ static ssize_t mem_write(struct file * f
}
*ppos = dst;
-out_mm:
- mmput(mm);
-out_free:
free_page((unsigned long) page);
-out_task:
- put_task_struct(task);
-out_no_task:
return copied;
}
@@ -959,11 +883,20 @@ loff_t mem_lseek(struct file *file, loff
return file->f_pos;
}
+static int mem_release(struct inode *inode, struct file *file)
+{
+ struct mm_struct *mm = file->private_data;
+
+ mmput(mm);
+ return 0;
+}
+
static const struct file_operations proc_mem_operations = {
.llseek = mem_lseek,
.read = mem_read,
.write = mem_write,
.open = mem_open,
+ .release = mem_release,
};
static ssize_t environ_read(struct file *file, char __user *buf,
|