task_mmu.c 12.9 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/mount.h>
#include <linux/seq_file.h>
M
Mauricio Lin 已提交
5
#include <linux/highmem.h>
6 7
#include <linux/pagemap.h>
#include <linux/mempolicy.h>
M
Mauricio Lin 已提交
8

L
Linus Torvalds 已提交
9 10
#include <asm/elf.h>
#include <asm/uaccess.h>
M
Mauricio Lin 已提交
11
#include <asm/tlbflush.h>
L
Linus Torvalds 已提交
12 13 14 15 16
#include "internal.h"

char *task_mem(struct mm_struct *mm, char *buffer)
{
	unsigned long data, text, lib;
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
	unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss;

	/*
	 * Note: to minimize their overhead, mm maintains hiwater_vm and
	 * hiwater_rss only when about to *lower* total_vm or rss.  Any
	 * collector of these hiwater stats must therefore get total_vm
	 * and rss too, which will usually be the higher.  Barriers? not
	 * worth the effort, such snapshots can always be inconsistent.
	 */
	hiwater_vm = total_vm = mm->total_vm;
	if (hiwater_vm < mm->hiwater_vm)
		hiwater_vm = mm->hiwater_vm;
	hiwater_rss = total_rss = get_mm_rss(mm);
	if (hiwater_rss < mm->hiwater_rss)
		hiwater_rss = mm->hiwater_rss;
L
Linus Torvalds 已提交
32 33 34 35 36

	data = mm->total_vm - mm->shared_vm - mm->stack_vm;
	text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10;
	lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text;
	buffer += sprintf(buffer,
37
		"VmPeak:\t%8lu kB\n"
L
Linus Torvalds 已提交
38 39
		"VmSize:\t%8lu kB\n"
		"VmLck:\t%8lu kB\n"
40
		"VmHWM:\t%8lu kB\n"
L
Linus Torvalds 已提交
41 42 43 44 45 46
		"VmRSS:\t%8lu kB\n"
		"VmData:\t%8lu kB\n"
		"VmStk:\t%8lu kB\n"
		"VmExe:\t%8lu kB\n"
		"VmLib:\t%8lu kB\n"
		"VmPTE:\t%8lu kB\n",
47 48
		hiwater_vm << (PAGE_SHIFT-10),
		(total_vm - mm->reserved_vm) << (PAGE_SHIFT-10),
L
Linus Torvalds 已提交
49
		mm->locked_vm << (PAGE_SHIFT-10),
50 51
		hiwater_rss << (PAGE_SHIFT-10),
		total_rss << (PAGE_SHIFT-10),
L
Linus Torvalds 已提交
52 53 54 55 56 57 58 59 60 61 62 63 64 65
		data << (PAGE_SHIFT-10),
		mm->stack_vm << (PAGE_SHIFT-10), text, lib,
		(PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10);
	return buffer;
}

unsigned long task_vsize(struct mm_struct *mm)
{
	return PAGE_SIZE * mm->total_vm;
}

int task_statm(struct mm_struct *mm, int *shared, int *text,
	       int *data, int *resident)
{
66
	*shared = get_mm_counter(mm, file_rss);
L
Linus Torvalds 已提交
67 68 69
	*text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
								>> PAGE_SHIFT;
	*data = mm->total_vm - mm->shared_vm;
70
	*resident = *shared + get_mm_counter(mm, anon_rss);
L
Linus Torvalds 已提交
71 72 73 74 75 76 77
	return mm->total_vm;
}

int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
{
	struct vm_area_struct * vma;
	int result = -ENOENT;
78 79
	struct task_struct *task = get_proc_task(inode);
	struct mm_struct * mm = NULL;
L
Linus Torvalds 已提交
80

81 82 83 84
	if (task) {
		mm = get_task_mm(task);
		put_task_struct(task);
	}
L
Linus Torvalds 已提交
85 86 87 88 89 90 91 92 93 94 95 96
	if (!mm)
		goto out;
	down_read(&mm->mmap_sem);

	vma = mm->mmap;
	while (vma) {
		if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file)
			break;
		vma = vma->vm_next;
	}

	if (vma) {
97 98
		*mnt = mntget(vma->vm_file->f_path.mnt);
		*dentry = dget(vma->vm_file->f_path.dentry);
L
Linus Torvalds 已提交
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
		result = 0;
	}

	up_read(&mm->mmap_sem);
	mmput(mm);
out:
	return result;
}

static void pad_len_spaces(struct seq_file *m, int len)
{
	len = 25 + sizeof(void*) * 6 - len;
	if (len < 1)
		len = 1;
	seq_printf(m, "%*c", len, ' ');
}

M
Mauricio Lin 已提交
116 117 118 119 120 121 122
struct mem_size_stats
{
	unsigned long resident;
	unsigned long shared_clean;
	unsigned long shared_dirty;
	unsigned long private_clean;
	unsigned long private_dirty;
123
	unsigned long referenced;
M
Mauricio Lin 已提交
124 125
};

126 127 128 129 130 131 132
struct pmd_walker {
	struct vm_area_struct *vma;
	void *private;
	void (*action)(struct vm_area_struct *, pmd_t *, unsigned long,
		       unsigned long, void *);
};

M
Mauricio Lin 已提交
133
static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss)
L
Linus Torvalds 已提交
134
{
135 136
	struct proc_maps_private *priv = m->private;
	struct task_struct *task = priv->task;
M
Mauricio Lin 已提交
137 138 139 140
	struct vm_area_struct *vma = v;
	struct mm_struct *mm = vma->vm_mm;
	struct file *file = vma->vm_file;
	int flags = vma->vm_flags;
L
Linus Torvalds 已提交
141 142 143 144 145
	unsigned long ino = 0;
	dev_t dev = 0;
	int len;

	if (file) {
146
		struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
L
Linus Torvalds 已提交
147 148 149 150 151
		dev = inode->i_sb->s_dev;
		ino = inode->i_ino;
	}

	seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n",
M
Mauricio Lin 已提交
152 153
			vma->vm_start,
			vma->vm_end,
L
Linus Torvalds 已提交
154 155 156 157
			flags & VM_READ ? 'r' : '-',
			flags & VM_WRITE ? 'w' : '-',
			flags & VM_EXEC ? 'x' : '-',
			flags & VM_MAYSHARE ? 's' : 'p',
M
Mauricio Lin 已提交
158
			vma->vm_pgoff << PAGE_SHIFT,
L
Linus Torvalds 已提交
159 160 161 162 163 164
			MAJOR(dev), MINOR(dev), ino, &len);

	/*
	 * Print the dentry name for named mappings, and a
	 * special [heap] marker for the heap:
	 */
M
Mauricio Lin 已提交
165
	if (file) {
L
Linus Torvalds 已提交
166
		pad_len_spaces(m, len);
167
		seq_path(m, file->f_path.mnt, file->f_path.dentry, "\n");
L
Linus Torvalds 已提交
168
	} else {
169 170 171 172
		const char *name = arch_vma_name(vma);
		if (!name) {
			if (mm) {
				if (vma->vm_start <= mm->start_brk &&
M
Mauricio Lin 已提交
173
						vma->vm_end >= mm->brk) {
174 175 176 177
					name = "[heap]";
				} else if (vma->vm_start <= mm->start_stack &&
					   vma->vm_end >= mm->start_stack) {
					name = "[stack]";
L
Linus Torvalds 已提交
178
				}
179 180
			} else {
				name = "[vdso]";
L
Linus Torvalds 已提交
181
			}
182 183
		}
		if (name) {
L
Linus Torvalds 已提交
184
			pad_len_spaces(m, len);
185
			seq_puts(m, name);
L
Linus Torvalds 已提交
186 187 188
		}
	}
	seq_putc(m, '\n');
M
Mauricio Lin 已提交
189 190 191

	if (mss)
		seq_printf(m,
192 193 194 195 196 197
			   "Size:           %8lu kB\n"
			   "Rss:            %8lu kB\n"
			   "Shared_Clean:   %8lu kB\n"
			   "Shared_Dirty:   %8lu kB\n"
			   "Private_Clean:  %8lu kB\n"
			   "Private_Dirty:  %8lu kB\n"
198
			   "Referenced:     %8lu kB\n",
M
Mauricio Lin 已提交
199 200 201 202 203
			   (vma->vm_end - vma->vm_start) >> 10,
			   mss->resident >> 10,
			   mss->shared_clean  >> 10,
			   mss->shared_dirty  >> 10,
			   mss->private_clean >> 10,
204 205
			   mss->private_dirty >> 10,
			   mss->referenced >> 10);
M
Mauricio Lin 已提交
206 207 208

	if (m->count < m->size)  /* vma is copied successfully */
		m->version = (vma != get_gate_vma(task))? vma->vm_start: 0;
L
Linus Torvalds 已提交
209 210 211
	return 0;
}

M
Mauricio Lin 已提交
212 213
static int show_map(struct seq_file *m, void *v)
{
214
	return show_map_internal(m, v, NULL);
M
Mauricio Lin 已提交
215 216
}

217 218 219
static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
			    unsigned long addr, unsigned long end,
			    void *private)
M
Mauricio Lin 已提交
220
{
221
	struct mem_size_stats *mss = private;
M
Mauricio Lin 已提交
222
	pte_t *pte, ptent;
223
	spinlock_t *ptl;
M
Mauricio Lin 已提交
224 225
	struct page *page;

226
	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
227
	for (; addr != end; pte++, addr += PAGE_SIZE) {
M
Mauricio Lin 已提交
228
		ptent = *pte;
229
		if (!pte_present(ptent))
M
Mauricio Lin 已提交
230 231 232
			continue;

		mss->resident += PAGE_SIZE;
N
Nick Piggin 已提交
233 234 235

		page = vm_normal_page(vma, addr, ptent);
		if (!page)
M
Mauricio Lin 已提交
236 237
			continue;

238 239 240
		/* Accumulate the size in pages that have been accessed. */
		if (pte_young(ptent) || PageReferenced(page))
			mss->referenced += PAGE_SIZE;
N
Nick Piggin 已提交
241
		if (page_mapcount(page) >= 2) {
M
Mauricio Lin 已提交
242 243 244 245 246 247 248 249 250 251
			if (pte_dirty(ptent))
				mss->shared_dirty += PAGE_SIZE;
			else
				mss->shared_clean += PAGE_SIZE;
		} else {
			if (pte_dirty(ptent))
				mss->private_dirty += PAGE_SIZE;
			else
				mss->private_clean += PAGE_SIZE;
		}
252
	}
253 254
	pte_unmap_unlock(pte - 1, ptl);
	cond_resched();
M
Mauricio Lin 已提交
255 256
}

257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
				 unsigned long addr, unsigned long end,
				 void *private)
{
	pte_t *pte, ptent;
	spinlock_t *ptl;
	struct page *page;

	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
	for (; addr != end; pte++, addr += PAGE_SIZE) {
		ptent = *pte;
		if (!pte_present(ptent))
			continue;

		page = vm_normal_page(vma, addr, ptent);
		if (!page)
			continue;

		/* Clear accessed and referenced bits. */
		ptep_test_and_clear_young(vma, addr, pte);
		ClearPageReferenced(page);
	}
	pte_unmap_unlock(pte - 1, ptl);
	cond_resched();
}

static inline void walk_pmd_range(struct pmd_walker *walker, pud_t *pud,
				  unsigned long addr, unsigned long end)
M
Mauricio Lin 已提交
285 286 287 288
{
	pmd_t *pmd;
	unsigned long next;

289 290
	for (pmd = pmd_offset(pud, addr); addr != end;
	     pmd++, addr = next) {
M
Mauricio Lin 已提交
291 292 293
		next = pmd_addr_end(addr, end);
		if (pmd_none_or_clear_bad(pmd))
			continue;
294 295
		walker->action(walker->vma, pmd, addr, next, walker->private);
	}
M
Mauricio Lin 已提交
296 297
}

298 299
static inline void walk_pud_range(struct pmd_walker *walker, pgd_t *pgd,
				  unsigned long addr, unsigned long end)
M
Mauricio Lin 已提交
300 301 302 303
{
	pud_t *pud;
	unsigned long next;

304 305
	for (pud = pud_offset(pgd, addr); addr != end;
	     pud++, addr = next) {
M
Mauricio Lin 已提交
306 307 308
		next = pud_addr_end(addr, end);
		if (pud_none_or_clear_bad(pud))
			continue;
309
		walk_pmd_range(walker, pud, addr, next);
310
	}
M
Mauricio Lin 已提交
311 312
}

313 314 315 316 317 318 319 320 321 322 323 324 325 326
/*
 * walk_page_range - walk the page tables of a VMA with a callback
 * @vma - VMA to walk
 * @action - callback invoked for every bottom-level (PTE) page table
 * @private - private data passed to the callback function
 *
 * Recursively walk the page table for the memory area in a VMA, calling
 * a callback for every bottom-level (PTE) page table.
 */
static inline void walk_page_range(struct vm_area_struct *vma,
				   void (*action)(struct vm_area_struct *,
						  pmd_t *, unsigned long,
						  unsigned long, void *),
				   void *private)
M
Mauricio Lin 已提交
327
{
328 329 330 331 332 333 334
	unsigned long addr = vma->vm_start;
	unsigned long end = vma->vm_end;
	struct pmd_walker walker = {
		.vma		= vma,
		.private	= private,
		.action		= action,
	};
M
Mauricio Lin 已提交
335 336 337
	pgd_t *pgd;
	unsigned long next;

338 339
	for (pgd = pgd_offset(vma->vm_mm, addr); addr != end;
	     pgd++, addr = next) {
M
Mauricio Lin 已提交
340 341 342
		next = pgd_addr_end(addr, end);
		if (pgd_none_or_clear_bad(pgd))
			continue;
343
		walk_pud_range(&walker, pgd, addr, next);
344
	}
M
Mauricio Lin 已提交
345 346 347 348 349 350 351 352
}

static int show_smap(struct seq_file *m, void *v)
{
	struct vm_area_struct *vma = v;
	struct mem_size_stats mss;

	memset(&mss, 0, sizeof mss);
N
Nick Piggin 已提交
353
	if (vma->vm_mm && !is_vm_hugetlb_page(vma))
354
		walk_page_range(vma, smaps_pte_range, &mss);
M
Mauricio Lin 已提交
355 356 357
	return show_map_internal(m, v, &mss);
}

358 359 360 361 362 363 364 365 366 367 368 369
void clear_refs_smap(struct mm_struct *mm)
{
	struct vm_area_struct *vma;

	down_read(&mm->mmap_sem);
	for (vma = mm->mmap; vma; vma = vma->vm_next)
		if (vma->vm_mm && !is_vm_hugetlb_page(vma))
			walk_page_range(vma, clear_refs_pte_range, NULL);
	flush_tlb_mm(mm);
	up_read(&mm->mmap_sem);
}

L
Linus Torvalds 已提交
370 371
static void *m_start(struct seq_file *m, loff_t *pos)
{
372
	struct proc_maps_private *priv = m->private;
L
Linus Torvalds 已提交
373 374
	unsigned long last_addr = m->version;
	struct mm_struct *mm;
375
	struct vm_area_struct *vma, *tail_vma = NULL;
L
Linus Torvalds 已提交
376 377
	loff_t l = *pos;

378 379 380 381
	/* Clear the per syscall fields in priv */
	priv->task = NULL;
	priv->tail_vma = NULL;

L
Linus Torvalds 已提交
382 383 384
	/*
	 * We remember last_addr rather than next_addr to hit with
	 * mmap_cache most of the time. We have zero last_addr at
M
Mauricio Lin 已提交
385 386
	 * the beginning and also after lseek. We will have -1 last_addr
	 * after the end of the vmas.
L
Linus Torvalds 已提交
387 388 389 390 391
	 */

	if (last_addr == -1UL)
		return NULL;

392
	priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
393 394 395 396
	if (!priv->task)
		return NULL;

	mm = get_task_mm(priv->task);
L
Linus Torvalds 已提交
397 398 399
	if (!mm)
		return NULL;

400
	priv->tail_vma = tail_vma = get_gate_vma(priv->task);
L
Linus Torvalds 已提交
401 402 403
	down_read(&mm->mmap_sem);

	/* Start with last addr hint */
M
Mauricio Lin 已提交
404 405
	if (last_addr && (vma = find_vma(mm, last_addr))) {
		vma = vma->vm_next;
L
Linus Torvalds 已提交
406 407 408 409
		goto out;
	}

	/*
M
Mauricio Lin 已提交
410
	 * Check the vma index is within the range and do
L
Linus Torvalds 已提交
411 412
	 * sequential scan until m_index.
	 */
M
Mauricio Lin 已提交
413
	vma = NULL;
L
Linus Torvalds 已提交
414
	if ((unsigned long)l < mm->map_count) {
M
Mauricio Lin 已提交
415 416 417
		vma = mm->mmap;
		while (l-- && vma)
			vma = vma->vm_next;
L
Linus Torvalds 已提交
418 419 420 421
		goto out;
	}

	if (l != mm->map_count)
M
Mauricio Lin 已提交
422
		tail_vma = NULL; /* After gate vma */
L
Linus Torvalds 已提交
423 424

out:
M
Mauricio Lin 已提交
425 426
	if (vma)
		return vma;
L
Linus Torvalds 已提交
427

M
Mauricio Lin 已提交
428 429
	/* End of vmas has been reached */
	m->version = (tail_vma != NULL)? 0: -1UL;
L
Linus Torvalds 已提交
430 431
	up_read(&mm->mmap_sem);
	mmput(mm);
M
Mauricio Lin 已提交
432
	return tail_vma;
L
Linus Torvalds 已提交
433 434
}

435
static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
L
Linus Torvalds 已提交
436
{
437
	if (vma && vma != priv->tail_vma) {
M
Mauricio Lin 已提交
438
		struct mm_struct *mm = vma->vm_mm;
L
Linus Torvalds 已提交
439 440 441 442 443 444 445
		up_read(&mm->mmap_sem);
		mmput(mm);
	}
}

static void *m_next(struct seq_file *m, void *v, loff_t *pos)
{
446
	struct proc_maps_private *priv = m->private;
M
Mauricio Lin 已提交
447
	struct vm_area_struct *vma = v;
448
	struct vm_area_struct *tail_vma = priv->tail_vma;
L
Linus Torvalds 已提交
449 450

	(*pos)++;
M
Mauricio Lin 已提交
451 452
	if (vma && (vma != tail_vma) && vma->vm_next)
		return vma->vm_next;
453
	vma_stop(priv, vma);
M
Mauricio Lin 已提交
454
	return (vma != tail_vma)? tail_vma: NULL;
L
Linus Torvalds 已提交
455 456
}

457 458 459 460 461 462 463 464 465 466
static void m_stop(struct seq_file *m, void *v)
{
	struct proc_maps_private *priv = m->private;
	struct vm_area_struct *vma = v;

	vma_stop(priv, vma);
	if (priv->task)
		put_task_struct(priv->task);
}

467
static struct seq_operations proc_pid_maps_op = {
L
Linus Torvalds 已提交
468 469 470 471 472
	.start	= m_start,
	.next	= m_next,
	.stop	= m_stop,
	.show	= show_map
};
473

474
static struct seq_operations proc_pid_smaps_op = {
M
Mauricio Lin 已提交
475 476 477 478 479 480
	.start	= m_start,
	.next	= m_next,
	.stop	= m_stop,
	.show	= show_smap
};

481 482 483
static int do_maps_open(struct inode *inode, struct file *file,
			struct seq_operations *ops)
{
484 485 486 487
	struct proc_maps_private *priv;
	int ret = -ENOMEM;
	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
	if (priv) {
488
		priv->pid = proc_pid(inode);
489 490 491 492 493 494 495
		ret = seq_open(file, ops);
		if (!ret) {
			struct seq_file *m = file->private_data;
			m->private = priv;
		} else {
			kfree(priv);
		}
496 497 498 499 500 501 502 503 504
	}
	return ret;
}

static int maps_open(struct inode *inode, struct file *file)
{
	return do_maps_open(inode, file, &proc_pid_maps_op);
}

505
const struct file_operations proc_maps_operations = {
506 507 508
	.open		= maps_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
509
	.release	= seq_release_private,
510 511
};

512
#ifdef CONFIG_NUMA
513
extern int show_numa_map(struct seq_file *m, void *v);
514

515
static struct seq_operations proc_pid_numa_maps_op = {
516 517 518 519
        .start  = m_start,
        .next   = m_next,
        .stop   = m_stop,
        .show   = show_numa_map
520
};
521 522 523 524 525 526

static int numa_maps_open(struct inode *inode, struct file *file)
{
	return do_maps_open(inode, file, &proc_pid_numa_maps_op);
}

527
const struct file_operations proc_numa_maps_operations = {
528 529 530
	.open		= numa_maps_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
531
	.release	= seq_release_private,
532
};
533
#endif
534 535 536 537 538 539

static int smaps_open(struct inode *inode, struct file *file)
{
	return do_maps_open(inode, file, &proc_pid_smaps_op);
}

540
const struct file_operations proc_smaps_operations = {
541 542 543
	.open		= smaps_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
544
	.release	= seq_release_private,
545
};