mem.c 19.7 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5
/*
 *  linux/drivers/char/mem.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
A
Andrew Morton 已提交
6
 *  Added devfs support.
L
Linus Torvalds 已提交
7
 *    Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu>
8
 *  Shared /dev/zero mmapping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
L
Linus Torvalds 已提交
9 10 11 12 13 14 15 16 17 18 19 20 21 22
 */

#include <linux/mm.h>
#include <linux/miscdevice.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/mman.h>
#include <linux/random.h>
#include <linux/init.h>
#include <linux/raw.h>
#include <linux/tty.h>
#include <linux/capability.h>
#include <linux/ptrace.h>
#include <linux/device.h>
23 24
#include <linux/highmem.h>
#include <linux/crash_dump.h>
L
Linus Torvalds 已提交
25
#include <linux/backing-dev.h>
26
#include <linux/bootmem.h>
27
#include <linux/splice.h>
28
#include <linux/pfn.h>
29
#include <linux/export.h>
L
Linus Torvalds 已提交
30 31 32 33 34 35 36 37

#include <asm/uaccess.h>
#include <asm/io.h>

#ifdef CONFIG_IA64
# include <linux/efi.h>
#endif

38 39 40 41 42
static inline unsigned long size_inside_page(unsigned long start,
					     unsigned long size)
{
	unsigned long sz;

43
	sz = PAGE_SIZE - (start & (PAGE_SIZE - 1));
44

45
	return min(sz, size);
46 47
}

L
Linus Torvalds 已提交
48
#ifndef ARCH_HAS_VALID_PHYS_ADDR_RANGE
49
static inline int valid_phys_addr_range(unsigned long addr, size_t count)
L
Linus Torvalds 已提交
50
{
51
	return addr + count <= __pa(high_memory);
L
Linus Torvalds 已提交
52
}
53

54
static inline int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
55 56 57
{
	return 1;
}
L
Linus Torvalds 已提交
58 59
#endif

60
#ifdef CONFIG_STRICT_DEVMEM
61
static inline int range_is_allowed(unsigned long pfn, unsigned long size)
62
{
63 64 65 66 67 68 69 70
	u64 from = ((u64)pfn) << PAGE_SHIFT;
	u64 to = from + size;
	u64 cursor = from;

	while (cursor < to) {
		if (!devmem_is_allowed(pfn)) {
			printk(KERN_INFO
		"Program %s tried to access /dev/mem between %Lx->%Lx.\n",
71 72 73
				current->comm, from, to);
			return 0;
		}
74 75
		cursor += PAGE_SIZE;
		pfn++;
76 77 78 79
	}
	return 1;
}
#else
80
static inline int range_is_allowed(unsigned long pfn, unsigned long size)
81 82 83 84 85
{
	return 1;
}
#endif

A
Andrew Morton 已提交
86
void __weak unxlate_dev_mem_ptr(unsigned long phys, void *addr)
87 88 89
{
}

L
Linus Torvalds 已提交
90
/*
A
Andrew Morton 已提交
91 92
 * This funcion reads the *physical* memory. The f_pos points directly to the
 * memory location.
L
Linus Torvalds 已提交
93
 */
A
Andrew Morton 已提交
94
static ssize_t read_mem(struct file *file, char __user *buf,
L
Linus Torvalds 已提交
95 96 97 98 99 100
			size_t count, loff_t *ppos)
{
	unsigned long p = *ppos;
	ssize_t read, sz;
	char *ptr;

101
	if (!valid_phys_addr_range(p, count))
L
Linus Torvalds 已提交
102 103 104 105 106
		return -EFAULT;
	read = 0;
#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
	/* we don't have page 0 mapped on sparc and m68k.. */
	if (p < PAGE_SIZE) {
107
		sz = size_inside_page(p, count);
L
Linus Torvalds 已提交
108 109 110
		if (sz > 0) {
			if (clear_user(buf, sz))
				return -EFAULT;
A
Andrew Morton 已提交
111 112 113 114
			buf += sz;
			p += sz;
			count -= sz;
			read += sz;
L
Linus Torvalds 已提交
115 116 117 118 119
		}
	}
#endif

	while (count > 0) {
120 121
		unsigned long remaining;

122
		sz = size_inside_page(p, count);
L
Linus Torvalds 已提交
123

124 125 126
		if (!range_is_allowed(p >> PAGE_SHIFT, count))
			return -EPERM;

L
Linus Torvalds 已提交
127
		/*
A
Andrew Morton 已提交
128 129 130
		 * On ia64 if a page has been mapped somewhere as uncached, then
		 * it must also be accessed uncached by the kernel or data
		 * corruption may occur.
L
Linus Torvalds 已提交
131 132
		 */
		ptr = xlate_dev_mem_ptr(p);
133 134
		if (!ptr)
			return -EFAULT;
L
Linus Torvalds 已提交
135

136
		remaining = copy_to_user(buf, ptr, sz);
137
		unxlate_dev_mem_ptr(p, ptr);
138 139
		if (remaining)
			return -EFAULT;
140

L
Linus Torvalds 已提交
141 142 143 144 145 146 147 148 149 150
		buf += sz;
		p += sz;
		count -= sz;
		read += sz;
	}

	*ppos += read;
	return read;
}

A
Andrew Morton 已提交
151
static ssize_t write_mem(struct file *file, const char __user *buf,
L
Linus Torvalds 已提交
152 153 154 155 156 157 158
			 size_t count, loff_t *ppos)
{
	unsigned long p = *ppos;
	ssize_t written, sz;
	unsigned long copied;
	void *ptr;

159
	if (!valid_phys_addr_range(p, count))
L
Linus Torvalds 已提交
160 161 162 163 164 165 166
		return -EFAULT;

	written = 0;

#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
	/* we don't have page 0 mapped on sparc and m68k.. */
	if (p < PAGE_SIZE) {
167
		sz = size_inside_page(p, count);
L
Linus Torvalds 已提交
168 169 170 171 172 173 174 175 176
		/* Hmm. Do something? */
		buf += sz;
		p += sz;
		count -= sz;
		written += sz;
	}
#endif

	while (count > 0) {
177
		sz = size_inside_page(p, count);
L
Linus Torvalds 已提交
178

179 180 181
		if (!range_is_allowed(p >> PAGE_SHIFT, sz))
			return -EPERM;

L
Linus Torvalds 已提交
182
		/*
A
Andrew Morton 已提交
183 184 185
		 * On ia64 if a page has been mapped somewhere as uncached, then
		 * it must also be accessed uncached by the kernel or data
		 * corruption may occur.
L
Linus Torvalds 已提交
186 187
		 */
		ptr = xlate_dev_mem_ptr(p);
188 189 190 191 192
		if (!ptr) {
			if (written)
				break;
			return -EFAULT;
		}
L
Linus Torvalds 已提交
193 194

		copied = copy_from_user(ptr, buf, sz);
195
		unxlate_dev_mem_ptr(p, ptr);
L
Linus Torvalds 已提交
196
		if (copied) {
197 198 199
			written += sz - copied;
			if (written)
				break;
L
Linus Torvalds 已提交
200 201
			return -EFAULT;
		}
202

L
Linus Torvalds 已提交
203 204 205 206 207 208 209 210 211 212
		buf += sz;
		p += sz;
		count -= sz;
		written += sz;
	}

	*ppos += written;
	return written;
}

A
Andrew Morton 已提交
213
int __weak phys_mem_access_prot_allowed(struct file *file,
214 215 216 217 218
	unsigned long pfn, unsigned long size, pgprot_t *vma_prot)
{
	return 1;
}

219
#ifndef __HAVE_PHYS_MEM_ACCESS_PROT
A
Andrew Morton 已提交
220 221 222 223 224 225

/*
 * Architectures vary in how they handle caching for addresses
 * outside of main memory.
 *
 */
226
#ifdef pgprot_noncached
A
Andrew Morton 已提交
227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
static int uncached_access(struct file *file, unsigned long addr)
{
#if defined(CONFIG_IA64)
	/*
	 * On ia64, we ignore O_DSYNC because we cannot tolerate memory
	 * attribute aliases.
	 */
	return !(efi_mem_attributes(addr) & EFI_MEMORY_WB);
#elif defined(CONFIG_MIPS)
	{
		extern int __uncached_access(struct file *file,
					     unsigned long addr);

		return __uncached_access(file, addr);
	}
#else
	/*
	 * Accessing memory above the top the kernel knows about or through a
	 * file pointer
	 * that was marked O_DSYNC will be done non-cached.
	 */
	if (file->f_flags & O_DSYNC)
		return 1;
	return addr >= __pa(high_memory);
#endif
}
253
#endif
A
Andrew Morton 已提交
254

255 256 257 258 259 260 261 262 263 264 265 266 267
static pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
				     unsigned long size, pgprot_t vma_prot)
{
#ifdef pgprot_noncached
	unsigned long offset = pfn << PAGE_SHIFT;

	if (uncached_access(file, offset))
		return pgprot_noncached(vma_prot);
#endif
	return vma_prot;
}
#endif

268 269 270 271 272 273 274 275 276
#ifndef CONFIG_MMU
static unsigned long get_unmapped_area_mem(struct file *file,
					   unsigned long addr,
					   unsigned long len,
					   unsigned long pgoff,
					   unsigned long flags)
{
	if (!valid_mmap_phys_addr_range(pgoff, len))
		return (unsigned long) -EINVAL;
277
	return pgoff << PAGE_SHIFT;
278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
}

/* can't do an in-place private mapping if there's no MMU */
static inline int private_mapping_ok(struct vm_area_struct *vma)
{
	return vma->vm_flags & VM_MAYSHARE;
}
#else
#define get_unmapped_area_mem	NULL

static inline int private_mapping_ok(struct vm_area_struct *vma)
{
	return 1;
}
#endif

294
static const struct vm_operations_struct mmap_mem_ops = {
295 296 297
#ifdef CONFIG_HAVE_IOREMAP_PROT
	.access = generic_access_phys
#endif
298 299
};

A
Andrew Morton 已提交
300
static int mmap_mem(struct file *file, struct vm_area_struct *vma)
L
Linus Torvalds 已提交
301
{
302 303
	size_t size = vma->vm_end - vma->vm_start;

304
	if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))
305 306
		return -EINVAL;

307 308 309
	if (!private_mapping_ok(vma))
		return -ENOSYS;

310 311 312
	if (!range_is_allowed(vma->vm_pgoff, size))
		return -EPERM;

313 314 315 316
	if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size,
						&vma->vm_page_prot))
		return -EINVAL;

317
	vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
318
						 size,
L
Linus Torvalds 已提交
319 320
						 vma->vm_page_prot);

321 322
	vma->vm_ops = &mmap_mem_ops;

L
Linus Torvalds 已提交
323 324 325 326
	/* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */
	if (remap_pfn_range(vma,
			    vma->vm_start,
			    vma->vm_pgoff,
327
			    size,
328
			    vma->vm_page_prot)) {
L
Linus Torvalds 已提交
329
		return -EAGAIN;
330
	}
L
Linus Torvalds 已提交
331 332 333
	return 0;
}

334
#ifdef CONFIG_DEVKMEM
A
Andrew Morton 已提交
335
static int mmap_kmem(struct file *file, struct vm_area_struct *vma)
L
Linus Torvalds 已提交
336
{
L
Linus Torvalds 已提交
337 338
	unsigned long pfn;

339 340
	/* Turn a kernel-virtual address into a physical page frame */
	pfn = __pa((u64)vma->vm_pgoff << PAGE_SHIFT) >> PAGE_SHIFT;
L
Linus Torvalds 已提交
341

L
Linus Torvalds 已提交
342
	/*
A
Andrew Morton 已提交
343 344 345
	 * RED-PEN: on some architectures there is more mapped memory than
	 * available in mem_map which pfn_valid checks for. Perhaps should add a
	 * new macro here.
L
Linus Torvalds 已提交
346 347 348
	 *
	 * RED-PEN: vmalloc is not supported right now.
	 */
L
Linus Torvalds 已提交
349
	if (!pfn_valid(pfn))
L
Linus Torvalds 已提交
350
		return -EIO;
L
Linus Torvalds 已提交
351 352

	vma->vm_pgoff = pfn;
L
Linus Torvalds 已提交
353 354
	return mmap_mem(file, vma);
}
355
#endif
L
Linus Torvalds 已提交
356

357 358 359 360
#ifdef CONFIG_CRASH_DUMP
/*
 * Read memory corresponding to the old kernel.
 */
361
static ssize_t read_oldmem(struct file *file, char __user *buf,
362 363
				size_t count, loff_t *ppos)
{
364 365 366
	unsigned long pfn, offset;
	size_t read = 0, csize;
	int rc = 0;
367

M
Maneesh Soni 已提交
368
	while (count) {
369
		pfn = *ppos / PAGE_SIZE;
370 371
		if (pfn > saved_max_pfn)
			return read;
372

373 374 375 376 377
		offset = (unsigned long)(*ppos % PAGE_SIZE);
		if (count > PAGE_SIZE - offset)
			csize = PAGE_SIZE - offset;
		else
			csize = count;
378

379 380 381
		rc = copy_oldmem_page(pfn, buf, csize, offset, 1);
		if (rc < 0)
			return rc;
382 383 384 385 386 387 388 389
		buf += csize;
		*ppos += csize;
		read += csize;
		count -= csize;
	}
	return read;
}
#endif
L
Linus Torvalds 已提交
390

391
#ifdef CONFIG_DEVKMEM
L
Linus Torvalds 已提交
392 393 394
/*
 * This function reads the *virtual* memory as seen by the kernel.
 */
A
Andrew Morton 已提交
395
static ssize_t read_kmem(struct file *file, char __user *buf,
L
Linus Torvalds 已提交
396 397 398 399 400
			 size_t count, loff_t *ppos)
{
	unsigned long p = *ppos;
	ssize_t low_count, read, sz;
	char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
401
	int err = 0;
L
Linus Torvalds 已提交
402 403 404 405

	read = 0;
	if (p < (unsigned long) high_memory) {
		low_count = count;
A
Andrew Morton 已提交
406 407
		if (count > (unsigned long)high_memory - p)
			low_count = (unsigned long)high_memory - p;
L
Linus Torvalds 已提交
408 409 410 411

#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
		/* we don't have page 0 mapped on sparc and m68k.. */
		if (p < PAGE_SIZE && low_count > 0) {
412 413
			sz = size_inside_page(p, low_count);
			if (clear_user(buf, sz))
L
Linus Torvalds 已提交
414
				return -EFAULT;
415 416 417 418 419
			buf += sz;
			p += sz;
			read += sz;
			low_count -= sz;
			count -= sz;
L
Linus Torvalds 已提交
420 421 422
		}
#endif
		while (low_count > 0) {
423
			sz = size_inside_page(p, low_count);
L
Linus Torvalds 已提交
424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446

			/*
			 * On ia64 if a page has been mapped somewhere as
			 * uncached, then it must also be accessed uncached
			 * by the kernel or data corruption may occur
			 */
			kbuf = xlate_dev_kmem_ptr((char *)p);

			if (copy_to_user(buf, kbuf, sz))
				return -EFAULT;
			buf += sz;
			p += sz;
			read += sz;
			low_count -= sz;
			count -= sz;
		}
	}

	if (count > 0) {
		kbuf = (char *)__get_free_page(GFP_KERNEL);
		if (!kbuf)
			return -ENOMEM;
		while (count > 0) {
447
			sz = size_inside_page(p, count);
448 449 450 451
			if (!is_vmalloc_or_module_addr((void *)p)) {
				err = -ENXIO;
				break;
			}
452 453
			sz = vread(kbuf, (char *)p, sz);
			if (!sz)
L
Linus Torvalds 已提交
454
				break;
455
			if (copy_to_user(buf, kbuf, sz)) {
456 457
				err = -EFAULT;
				break;
L
Linus Torvalds 已提交
458
			}
459 460 461 462
			count -= sz;
			buf += sz;
			read += sz;
			p += sz;
L
Linus Torvalds 已提交
463 464 465
		}
		free_page((unsigned long)kbuf);
	}
466 467
	*ppos = p;
	return read ? read : err;
L
Linus Torvalds 已提交
468 469 470
}


A
Andrew Morton 已提交
471 472
static ssize_t do_write_kmem(unsigned long p, const char __user *buf,
				size_t count, loff_t *ppos)
L
Linus Torvalds 已提交
473 474 475 476 477 478 479
{
	ssize_t written, sz;
	unsigned long copied;

	written = 0;
#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
	/* we don't have page 0 mapped on sparc and m68k.. */
480 481
	if (p < PAGE_SIZE) {
		sz = size_inside_page(p, count);
L
Linus Torvalds 已提交
482 483 484 485 486 487 488 489 490 491 492
		/* Hmm. Do something? */
		buf += sz;
		p += sz;
		count -= sz;
		written += sz;
	}
#endif

	while (count > 0) {
		char *ptr;

493
		sz = size_inside_page(p, count);
L
Linus Torvalds 已提交
494 495

		/*
A
Andrew Morton 已提交
496 497 498
		 * On ia64 if a page has been mapped somewhere as uncached, then
		 * it must also be accessed uncached by the kernel or data
		 * corruption may occur.
L
Linus Torvalds 已提交
499
		 */
500
		ptr = xlate_dev_kmem_ptr((char *)p);
L
Linus Torvalds 已提交
501 502 503

		copied = copy_from_user(ptr, buf, sz);
		if (copied) {
504 505 506
			written += sz - copied;
			if (written)
				break;
L
Linus Torvalds 已提交
507 508 509 510 511 512 513 514 515 516 517 518 519 520 521
			return -EFAULT;
		}
		buf += sz;
		p += sz;
		count -= sz;
		written += sz;
	}

	*ppos += written;
	return written;
}

/*
 * This function writes to the *virtual* memory as seen by the kernel.
 */
A
Andrew Morton 已提交
522
static ssize_t write_kmem(struct file *file, const char __user *buf,
L
Linus Torvalds 已提交
523 524 525 526 527 528
			  size_t count, loff_t *ppos)
{
	unsigned long p = *ppos;
	ssize_t wrote = 0;
	ssize_t virtr = 0;
	char * kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */
529
	int err = 0;
L
Linus Torvalds 已提交
530 531

	if (p < (unsigned long) high_memory) {
532 533
		unsigned long to_write = min_t(unsigned long, count,
					       (unsigned long)high_memory - p);
534
		wrote = do_write_kmem(p, buf, to_write, ppos);
535 536
		if (wrote != to_write)
			return wrote;
L
Linus Torvalds 已提交
537 538 539 540 541 542 543 544 545 546
		p += wrote;
		buf += wrote;
		count -= wrote;
	}

	if (count > 0) {
		kbuf = (char *)__get_free_page(GFP_KERNEL);
		if (!kbuf)
			return wrote ? wrote : -ENOMEM;
		while (count > 0) {
547 548
			unsigned long sz = size_inside_page(p, count);
			unsigned long n;
L
Linus Torvalds 已提交
549

550 551 552 553
			if (!is_vmalloc_or_module_addr((void *)p)) {
				err = -ENXIO;
				break;
			}
554 555
			n = copy_from_user(kbuf, buf, sz);
			if (n) {
556 557
				err = -EFAULT;
				break;
L
Linus Torvalds 已提交
558
			}
559
			vwrite(kbuf, (char *)p, sz);
560 561 562 563
			count -= sz;
			buf += sz;
			virtr += sz;
			p += sz;
L
Linus Torvalds 已提交
564 565 566 567
		}
		free_page((unsigned long)kbuf);
	}

568 569
	*ppos = p;
	return virtr + wrote ? : err;
L
Linus Torvalds 已提交
570
}
571
#endif
L
Linus Torvalds 已提交
572

573
#ifdef CONFIG_DEVPORT
A
Andrew Morton 已提交
574
static ssize_t read_port(struct file *file, char __user *buf,
L
Linus Torvalds 已提交
575 576 577 578 579 580
			 size_t count, loff_t *ppos)
{
	unsigned long i = *ppos;
	char __user *tmp = buf;

	if (!access_ok(VERIFY_WRITE, buf, count))
A
Andrew Morton 已提交
581
		return -EFAULT;
L
Linus Torvalds 已提交
582
	while (count-- > 0 && i < 65536) {
A
Andrew Morton 已提交
583 584
		if (__put_user(inb(i), tmp) < 0)
			return -EFAULT;
L
Linus Torvalds 已提交
585 586 587 588 589 590 591
		i++;
		tmp++;
	}
	*ppos = i;
	return tmp-buf;
}

A
Andrew Morton 已提交
592
static ssize_t write_port(struct file *file, const char __user *buf,
L
Linus Torvalds 已提交
593 594 595 596 597
			  size_t count, loff_t *ppos)
{
	unsigned long i = *ppos;
	const char __user * tmp = buf;

A
Andrew Morton 已提交
598
	if (!access_ok(VERIFY_READ, buf, count))
L
Linus Torvalds 已提交
599 600 601
		return -EFAULT;
	while (count-- > 0 && i < 65536) {
		char c;
602 603 604
		if (__get_user(c, tmp)) {
			if (tmp > buf)
				break;
A
Andrew Morton 已提交
605
			return -EFAULT;
606
		}
A
Andrew Morton 已提交
607
		outb(c, i);
L
Linus Torvalds 已提交
608 609 610 611 612 613 614 615
		i++;
		tmp++;
	}
	*ppos = i;
	return tmp-buf;
}
#endif

A
Andrew Morton 已提交
616
static ssize_t read_null(struct file *file, char __user *buf,
L
Linus Torvalds 已提交
617 618 619 620 621
			 size_t count, loff_t *ppos)
{
	return 0;
}

A
Andrew Morton 已提交
622
static ssize_t write_null(struct file *file, const char __user *buf,
L
Linus Torvalds 已提交
623 624 625 626 627
			  size_t count, loff_t *ppos)
{
	return count;
}

628 629 630 631 632 633
static int pipe_to_null(struct pipe_inode_info *info, struct pipe_buffer *buf,
			struct splice_desc *sd)
{
	return sd->len;
}

A
Andrew Morton 已提交
634
static ssize_t splice_write_null(struct pipe_inode_info *pipe, struct file *out,
635 636 637 638 639
				 loff_t *ppos, size_t len, unsigned int flags)
{
	return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_null);
}

A
Andrew Morton 已提交
640
static ssize_t read_zero(struct file *file, char __user *buf,
L
Linus Torvalds 已提交
641 642
			 size_t count, loff_t *ppos)
{
N
Nick Piggin 已提交
643
	size_t written;
L
Linus Torvalds 已提交
644 645 646 647 648 649 650

	if (!count)
		return 0;

	if (!access_ok(VERIFY_WRITE, buf, count))
		return -EFAULT;

N
Nick Piggin 已提交
651 652 653 654
	written = 0;
	while (count) {
		unsigned long unwritten;
		size_t chunk = count;
L
Linus Torvalds 已提交
655

N
Nick Piggin 已提交
656 657
		if (chunk > PAGE_SIZE)
			chunk = PAGE_SIZE;	/* Just for latency reasons */
658
		unwritten = __clear_user(buf, chunk);
N
Nick Piggin 已提交
659
		written += chunk - unwritten;
L
Linus Torvalds 已提交
660
		if (unwritten)
N
Nick Piggin 已提交
661
			break;
662 663
		if (signal_pending(current))
			return written ? written : -ERESTARTSYS;
L
Linus Torvalds 已提交
664
		buf += chunk;
N
Nick Piggin 已提交
665
		count -= chunk;
L
Linus Torvalds 已提交
666 667
		cond_resched();
	}
N
Nick Piggin 已提交
668
	return written ? written : -EFAULT;
L
Linus Torvalds 已提交
669 670
}

A
Andrew Morton 已提交
671
static int mmap_zero(struct file *file, struct vm_area_struct *vma)
L
Linus Torvalds 已提交
672
{
N
Nick Piggin 已提交
673
#ifndef CONFIG_MMU
L
Linus Torvalds 已提交
674
	return -ENOSYS;
N
Nick Piggin 已提交
675 676 677 678
#endif
	if (vma->vm_flags & VM_SHARED)
		return shmem_zero_setup(vma);
	return 0;
L
Linus Torvalds 已提交
679 680
}

A
Andrew Morton 已提交
681
static ssize_t write_full(struct file *file, const char __user *buf,
L
Linus Torvalds 已提交
682 683 684 685 686 687 688 689 690 691
			  size_t count, loff_t *ppos)
{
	return -ENOSPC;
}

/*
 * Special lseek() function for /dev/null and /dev/zero.  Most notably, you
 * can fopen() both devices with "a" now.  This was previously impossible.
 * -- SRB.
 */
A
Andrew Morton 已提交
692
static loff_t null_lseek(struct file *file, loff_t offset, int orig)
L
Linus Torvalds 已提交
693 694 695 696 697 698 699 700 701 702 703 704
{
	return file->f_pos = 0;
}

/*
 * The memory devices use the full 32/64 bits of the offset, and so we cannot
 * check against negative addresses: they are ok. The return value is weird,
 * though, in that case (0).
 *
 * also note that seeking relative to the "end of file" isn't supported:
 * it has no meaning, so it returns -EINVAL.
 */
A
Andrew Morton 已提交
705
static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
L
Linus Torvalds 已提交
706 707 708
{
	loff_t ret;

709
	mutex_lock(&file->f_path.dentry->d_inode->i_mutex);
L
Linus Torvalds 已提交
710
	switch (orig) {
A
Andrew Morton 已提交
711 712 713 714 715 716
	case SEEK_CUR:
		offset += file->f_pos;
	case SEEK_SET:
		/* to avoid userland mistaking f_pos=-9 as -EBADF=-9 */
		if ((unsigned long long)offset >= ~0xFFFULL) {
			ret = -EOVERFLOW;
L
Linus Torvalds 已提交
717
			break;
A
Andrew Morton 已提交
718 719 720 721 722 723 724
		}
		file->f_pos = offset;
		ret = file->f_pos;
		force_successful_syscall_return();
		break;
	default:
		ret = -EINVAL;
L
Linus Torvalds 已提交
725
	}
726
	mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
L
Linus Torvalds 已提交
727 728 729 730 731 732 733 734 735 736 737 738 739 740
	return ret;
}

static int open_port(struct inode * inode, struct file * filp)
{
	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
}

#define zero_lseek	null_lseek
#define full_lseek      null_lseek
#define write_zero	write_null
#define read_full       read_zero
#define open_mem	open_port
#define open_kmem	open_mem
741
#define open_oldmem	open_mem
L
Linus Torvalds 已提交
742

743
static const struct file_operations mem_fops = {
L
Linus Torvalds 已提交
744 745 746 747 748
	.llseek		= memory_lseek,
	.read		= read_mem,
	.write		= write_mem,
	.mmap		= mmap_mem,
	.open		= open_mem,
749
	.get_unmapped_area = get_unmapped_area_mem,
L
Linus Torvalds 已提交
750 751
};

752
#ifdef CONFIG_DEVKMEM
753
static const struct file_operations kmem_fops = {
L
Linus Torvalds 已提交
754 755 756 757 758
	.llseek		= memory_lseek,
	.read		= read_kmem,
	.write		= write_kmem,
	.mmap		= mmap_kmem,
	.open		= open_kmem,
759
	.get_unmapped_area = get_unmapped_area_mem,
L
Linus Torvalds 已提交
760
};
761
#endif
L
Linus Torvalds 已提交
762

763
static const struct file_operations null_fops = {
L
Linus Torvalds 已提交
764 765 766
	.llseek		= null_lseek,
	.read		= read_null,
	.write		= write_null,
767
	.splice_write	= splice_write_null,
L
Linus Torvalds 已提交
768 769
};

770
#ifdef CONFIG_DEVPORT
771
static const struct file_operations port_fops = {
L
Linus Torvalds 已提交
772 773 774 775 776 777 778
	.llseek		= memory_lseek,
	.read		= read_port,
	.write		= write_port,
	.open		= open_port,
};
#endif

779
static const struct file_operations zero_fops = {
L
Linus Torvalds 已提交
780 781 782 783 784 785
	.llseek		= zero_lseek,
	.read		= read_zero,
	.write		= write_zero,
	.mmap		= mmap_zero,
};

786 787 788
/*
 * capabilities for /dev/zero
 * - permits private mappings, "copies" are taken of the source of zeros
789
 * - no writeback happens
790
 */
L
Linus Torvalds 已提交
791
static struct backing_dev_info zero_bdi = {
792
	.name		= "char/mem",
793
	.capabilities	= BDI_CAP_MAP_COPY | BDI_CAP_NO_ACCT_AND_WRITEBACK,
L
Linus Torvalds 已提交
794 795
};

796
static const struct file_operations full_fops = {
L
Linus Torvalds 已提交
797 798 799 800 801
	.llseek		= full_lseek,
	.read		= read_full,
	.write		= write_full,
};

802
#ifdef CONFIG_CRASH_DUMP
803
static const struct file_operations oldmem_fops = {
804 805
	.read	= read_oldmem,
	.open	= open_oldmem,
806
	.llseek = default_llseek,
807 808 809
};
#endif

810 811
static ssize_t kmsg_writev(struct kiocb *iocb, const struct iovec *iv,
			   unsigned long count, loff_t pos)
L
Linus Torvalds 已提交
812
{
813
	char *line, *p;
814
	int i;
815
	ssize_t ret = -EFAULT;
816
	size_t len = iov_length(iv, count);
L
Linus Torvalds 已提交
817

818
	line = kmalloc(len + 1, GFP_KERNEL);
819
	if (line == NULL)
L
Linus Torvalds 已提交
820
		return -ENOMEM;
821 822 823 824 825

	/*
	 * copy all vectors into a single string, to ensure we do
	 * not interleave our log line with other printk calls
	 */
826
	p = line;
827 828 829 830
	for (i = 0; i < count; i++) {
		if (copy_from_user(p, iv[i].iov_base, iv[i].iov_len))
			goto out;
		p += iv[i].iov_len;
L
Linus Torvalds 已提交
831
	}
832 833 834 835 836 837 838 839
	p[0] = '\0';

	ret = printk("%s", line);
	/* printk can add a prefix */
	if (ret > len)
		ret = len;
out:
	kfree(line);
L
Linus Torvalds 已提交
840 841 842
	return ret;
}

843
static const struct file_operations kmsg_fops = {
844
	.aio_write = kmsg_writev,
845
	.llseek = noop_llseek,
L
Linus Torvalds 已提交
846 847
};

848 849
static const struct memdev {
	const char *name;
850
	mode_t mode;
851 852 853
	const struct file_operations *fops;
	struct backing_dev_info *dev_info;
} devlist[] = {
854
	 [1] = { "mem", 0, &mem_fops, &directly_mappable_cdev_bdi },
855
#ifdef CONFIG_DEVKMEM
856
	 [2] = { "kmem", 0, &kmem_fops, &directly_mappable_cdev_bdi },
857
#endif
858
	 [3] = { "null", 0666, &null_fops, NULL },
859
#ifdef CONFIG_DEVPORT
860
	 [4] = { "port", 0, &port_fops, NULL },
L
Linus Torvalds 已提交
861
#endif
862 863 864 865 866
	 [5] = { "zero", 0666, &zero_fops, &zero_bdi },
	 [7] = { "full", 0666, &full_fops, NULL },
	 [8] = { "random", 0666, &random_fops, NULL },
	 [9] = { "urandom", 0666, &urandom_fops, NULL },
	[11] = { "kmsg", 0, &kmsg_fops, NULL },
867
#ifdef CONFIG_CRASH_DUMP
868
	[12] = { "oldmem", 0, &oldmem_fops, NULL },
869
#endif
870 871 872 873
};

static int memory_open(struct inode *inode, struct file *filp)
{
874 875
	int minor;
	const struct memdev *dev;
876

877 878
	minor = iminor(inode);
	if (minor >= ARRAY_SIZE(devlist))
879
		return -ENXIO;
880

881 882
	dev = &devlist[minor];
	if (!dev->fops)
883
		return -ENXIO;
884

885 886 887
	filp->f_op = dev->fops;
	if (dev->dev_info)
		filp->f_mapping->backing_dev_info = dev->dev_info;
888

889 890 891 892
	/* Is /dev/mem or /dev/kmem ? */
	if (dev->dev_info == &directly_mappable_cdev_bdi)
		filp->f_mode |= FMODE_UNSIGNED_OFFSET;

893
	if (dev->fops->open)
894 895 896
		return dev->fops->open(inode, filp);

	return 0;
L
Linus Torvalds 已提交
897 898
}

899
static const struct file_operations memory_fops = {
A
Andrew Morton 已提交
900
	.open = memory_open,
901
	.llseek = noop_llseek,
L
Linus Torvalds 已提交
902 903
};

904 905 906 907 908 909 910
static char *mem_devnode(struct device *dev, mode_t *mode)
{
	if (mode && devlist[MINOR(dev->devt)].mode)
		*mode = devlist[MINOR(dev->devt)].mode;
	return NULL;
}

911
static struct class *mem_class;
L
Linus Torvalds 已提交
912 913 914

static int __init chr_dev_init(void)
{
915
	int minor;
P
Peter Zijlstra 已提交
916 917 918 919 920
	int err;

	err = bdi_init(&zero_bdi);
	if (err)
		return err;
L
Linus Torvalds 已提交
921

A
Andrew Morton 已提交
922
	if (register_chrdev(MEM_MAJOR, "mem", &memory_fops))
L
Linus Torvalds 已提交
923 924
		printk("unable to get major %d for memory devs\n", MEM_MAJOR);

925
	mem_class = class_create(THIS_MODULE, "mem");
926 927 928
	if (IS_ERR(mem_class))
		return PTR_ERR(mem_class);

929
	mem_class->devnode = mem_devnode;
930 931 932 933 934 935
	for (minor = 1; minor < ARRAY_SIZE(devlist); minor++) {
		if (!devlist[minor].name)
			continue;
		device_create(mem_class, NULL, MKDEV(MEM_MAJOR, minor),
			      NULL, devlist[minor].name);
	}
936

937
	return tty_init();
L
Linus Torvalds 已提交
938 939 940
}

fs_initcall(chr_dev_init);