mem.c 19.2 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5
/*
 *  linux/drivers/char/mem.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
A
Andrew Morton 已提交
6
 *  Added devfs support.
L
Linus Torvalds 已提交
7
 *    Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu>
8
 *  Shared /dev/zero mmapping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
L
Linus Torvalds 已提交
9 10 11 12 13 14 15 16 17 18 19 20 21 22
 */

#include <linux/mm.h>
#include <linux/miscdevice.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/mman.h>
#include <linux/random.h>
#include <linux/init.h>
#include <linux/raw.h>
#include <linux/tty.h>
#include <linux/capability.h>
#include <linux/ptrace.h>
#include <linux/device.h>
23 24
#include <linux/highmem.h>
#include <linux/crash_dump.h>
L
Linus Torvalds 已提交
25
#include <linux/backing-dev.h>
26
#include <linux/bootmem.h>
27
#include <linux/splice.h>
28
#include <linux/pfn.h>
L
Linus Torvalds 已提交
29 30 31 32 33 34 35 36

#include <asm/uaccess.h>
#include <asm/io.h>

#ifdef CONFIG_IA64
# include <linux/efi.h>
#endif

37 38 39 40 41
static inline unsigned long size_inside_page(unsigned long start,
					     unsigned long size)
{
	unsigned long sz;

42
	sz = PAGE_SIZE - (start & (PAGE_SIZE - 1));
43

44
	return min(sz, size);
45 46
}

L
Linus Torvalds 已提交
47
#ifndef ARCH_HAS_VALID_PHYS_ADDR_RANGE
48
static inline int valid_phys_addr_range(unsigned long addr, size_t count)
L
Linus Torvalds 已提交
49
{
50
	if (addr + count > __pa(high_memory))
L
Linus Torvalds 已提交
51 52 53 54
		return 0;

	return 1;
}
55

56
static inline int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
57 58 59
{
	return 1;
}
L
Linus Torvalds 已提交
60 61
#endif

62
#ifdef CONFIG_STRICT_DEVMEM
63
static inline int range_is_allowed(unsigned long pfn, unsigned long size)
64
{
65 66 67 68 69 70 71 72
	u64 from = ((u64)pfn) << PAGE_SHIFT;
	u64 to = from + size;
	u64 cursor = from;

	while (cursor < to) {
		if (!devmem_is_allowed(pfn)) {
			printk(KERN_INFO
		"Program %s tried to access /dev/mem between %Lx->%Lx.\n",
73 74 75
				current->comm, from, to);
			return 0;
		}
76 77
		cursor += PAGE_SIZE;
		pfn++;
78 79 80 81
	}
	return 1;
}
#else
82
static inline int range_is_allowed(unsigned long pfn, unsigned long size)
83 84 85 86 87
{
	return 1;
}
#endif

A
Andrew Morton 已提交
88
void __weak unxlate_dev_mem_ptr(unsigned long phys, void *addr)
89 90 91
{
}

L
Linus Torvalds 已提交
92
/*
A
Andrew Morton 已提交
93 94
 * This funcion reads the *physical* memory. The f_pos points directly to the
 * memory location.
L
Linus Torvalds 已提交
95
 */
A
Andrew Morton 已提交
96
static ssize_t read_mem(struct file *file, char __user *buf,
L
Linus Torvalds 已提交
97 98 99 100 101 102
			size_t count, loff_t *ppos)
{
	unsigned long p = *ppos;
	ssize_t read, sz;
	char *ptr;

103
	if (!valid_phys_addr_range(p, count))
L
Linus Torvalds 已提交
104 105 106 107 108
		return -EFAULT;
	read = 0;
#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
	/* we don't have page 0 mapped on sparc and m68k.. */
	if (p < PAGE_SIZE) {
109
		sz = size_inside_page(p, count);
L
Linus Torvalds 已提交
110 111 112
		if (sz > 0) {
			if (clear_user(buf, sz))
				return -EFAULT;
A
Andrew Morton 已提交
113 114 115 116
			buf += sz;
			p += sz;
			count -= sz;
			read += sz;
L
Linus Torvalds 已提交
117 118 119 120 121
		}
	}
#endif

	while (count > 0) {
122 123
		unsigned long remaining;

124
		sz = size_inside_page(p, count);
L
Linus Torvalds 已提交
125

126 127 128
		if (!range_is_allowed(p >> PAGE_SHIFT, count))
			return -EPERM;

L
Linus Torvalds 已提交
129
		/*
A
Andrew Morton 已提交
130 131 132
		 * On ia64 if a page has been mapped somewhere as uncached, then
		 * it must also be accessed uncached by the kernel or data
		 * corruption may occur.
L
Linus Torvalds 已提交
133 134
		 */
		ptr = xlate_dev_mem_ptr(p);
135 136
		if (!ptr)
			return -EFAULT;
L
Linus Torvalds 已提交
137

138
		remaining = copy_to_user(buf, ptr, sz);
139
		unxlate_dev_mem_ptr(p, ptr);
140 141
		if (remaining)
			return -EFAULT;
142

L
Linus Torvalds 已提交
143 144 145 146 147 148 149 150 151 152
		buf += sz;
		p += sz;
		count -= sz;
		read += sz;
	}

	*ppos += read;
	return read;
}

A
Andrew Morton 已提交
153
static ssize_t write_mem(struct file *file, const char __user *buf,
L
Linus Torvalds 已提交
154 155 156 157 158 159 160
			 size_t count, loff_t *ppos)
{
	unsigned long p = *ppos;
	ssize_t written, sz;
	unsigned long copied;
	void *ptr;

161
	if (!valid_phys_addr_range(p, count))
L
Linus Torvalds 已提交
162 163 164 165 166 167 168
		return -EFAULT;

	written = 0;

#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
	/* we don't have page 0 mapped on sparc and m68k.. */
	if (p < PAGE_SIZE) {
169
		sz = size_inside_page(p, count);
L
Linus Torvalds 已提交
170 171 172 173 174 175 176 177 178
		/* Hmm. Do something? */
		buf += sz;
		p += sz;
		count -= sz;
		written += sz;
	}
#endif

	while (count > 0) {
179
		sz = size_inside_page(p, count);
L
Linus Torvalds 已提交
180

181 182 183
		if (!range_is_allowed(p >> PAGE_SHIFT, sz))
			return -EPERM;

L
Linus Torvalds 已提交
184
		/*
A
Andrew Morton 已提交
185 186 187
		 * On ia64 if a page has been mapped somewhere as uncached, then
		 * it must also be accessed uncached by the kernel or data
		 * corruption may occur.
L
Linus Torvalds 已提交
188 189
		 */
		ptr = xlate_dev_mem_ptr(p);
190 191 192 193 194
		if (!ptr) {
			if (written)
				break;
			return -EFAULT;
		}
L
Linus Torvalds 已提交
195 196

		copied = copy_from_user(ptr, buf, sz);
197
		unxlate_dev_mem_ptr(p, ptr);
L
Linus Torvalds 已提交
198
		if (copied) {
199 200 201
			written += sz - copied;
			if (written)
				break;
L
Linus Torvalds 已提交
202 203
			return -EFAULT;
		}
204

L
Linus Torvalds 已提交
205 206 207 208 209 210 211 212 213 214
		buf += sz;
		p += sz;
		count -= sz;
		written += sz;
	}

	*ppos += written;
	return written;
}

A
Andrew Morton 已提交
215
int __weak phys_mem_access_prot_allowed(struct file *file,
216 217 218 219 220
	unsigned long pfn, unsigned long size, pgprot_t *vma_prot)
{
	return 1;
}

221
#ifndef __HAVE_PHYS_MEM_ACCESS_PROT
A
Andrew Morton 已提交
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254

/*
 * Architectures vary in how they handle caching for addresses
 * outside of main memory.
 *
 */
static int uncached_access(struct file *file, unsigned long addr)
{
#if defined(CONFIG_IA64)
	/*
	 * On ia64, we ignore O_DSYNC because we cannot tolerate memory
	 * attribute aliases.
	 */
	return !(efi_mem_attributes(addr) & EFI_MEMORY_WB);
#elif defined(CONFIG_MIPS)
	{
		extern int __uncached_access(struct file *file,
					     unsigned long addr);

		return __uncached_access(file, addr);
	}
#else
	/*
	 * Accessing memory above the top the kernel knows about or through a
	 * file pointer
	 * that was marked O_DSYNC will be done non-cached.
	 */
	if (file->f_flags & O_DSYNC)
		return 1;
	return addr >= __pa(high_memory);
#endif
}

255 256 257 258 259 260 261 262 263 264 265 266 267
static pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
				     unsigned long size, pgprot_t vma_prot)
{
#ifdef pgprot_noncached
	unsigned long offset = pfn << PAGE_SHIFT;

	if (uncached_access(file, offset))
		return pgprot_noncached(vma_prot);
#endif
	return vma_prot;
}
#endif

268 269 270 271 272 273 274 275 276
#ifndef CONFIG_MMU
static unsigned long get_unmapped_area_mem(struct file *file,
					   unsigned long addr,
					   unsigned long len,
					   unsigned long pgoff,
					   unsigned long flags)
{
	if (!valid_mmap_phys_addr_range(pgoff, len))
		return (unsigned long) -EINVAL;
277
	return pgoff << PAGE_SHIFT;
278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
}

/* can't do an in-place private mapping if there's no MMU */
static inline int private_mapping_ok(struct vm_area_struct *vma)
{
	return vma->vm_flags & VM_MAYSHARE;
}
#else
#define get_unmapped_area_mem	NULL

static inline int private_mapping_ok(struct vm_area_struct *vma)
{
	return 1;
}
#endif

294
static const struct vm_operations_struct mmap_mem_ops = {
295 296 297
#ifdef CONFIG_HAVE_IOREMAP_PROT
	.access = generic_access_phys
#endif
298 299
};

A
Andrew Morton 已提交
300
static int mmap_mem(struct file *file, struct vm_area_struct *vma)
L
Linus Torvalds 已提交
301
{
302 303
	size_t size = vma->vm_end - vma->vm_start;

304
	if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))
305 306
		return -EINVAL;

307 308 309
	if (!private_mapping_ok(vma))
		return -ENOSYS;

310 311 312
	if (!range_is_allowed(vma->vm_pgoff, size))
		return -EPERM;

313 314 315 316
	if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size,
						&vma->vm_page_prot))
		return -EINVAL;

317
	vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
318
						 size,
L
Linus Torvalds 已提交
319 320
						 vma->vm_page_prot);

321 322
	vma->vm_ops = &mmap_mem_ops;

L
Linus Torvalds 已提交
323 324 325 326
	/* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */
	if (remap_pfn_range(vma,
			    vma->vm_start,
			    vma->vm_pgoff,
327
			    size,
328
			    vma->vm_page_prot)) {
L
Linus Torvalds 已提交
329
		return -EAGAIN;
330
	}
L
Linus Torvalds 已提交
331 332 333
	return 0;
}

334
#ifdef CONFIG_DEVKMEM
A
Andrew Morton 已提交
335
static int mmap_kmem(struct file *file, struct vm_area_struct *vma)
L
Linus Torvalds 已提交
336
{
L
Linus Torvalds 已提交
337 338
	unsigned long pfn;

339 340
	/* Turn a kernel-virtual address into a physical page frame */
	pfn = __pa((u64)vma->vm_pgoff << PAGE_SHIFT) >> PAGE_SHIFT;
L
Linus Torvalds 已提交
341

L
Linus Torvalds 已提交
342
	/*
A
Andrew Morton 已提交
343 344 345
	 * RED-PEN: on some architectures there is more mapped memory than
	 * available in mem_map which pfn_valid checks for. Perhaps should add a
	 * new macro here.
L
Linus Torvalds 已提交
346 347 348
	 *
	 * RED-PEN: vmalloc is not supported right now.
	 */
L
Linus Torvalds 已提交
349
	if (!pfn_valid(pfn))
L
Linus Torvalds 已提交
350
		return -EIO;
L
Linus Torvalds 已提交
351 352

	vma->vm_pgoff = pfn;
L
Linus Torvalds 已提交
353 354
	return mmap_mem(file, vma);
}
355
#endif
L
Linus Torvalds 已提交
356

357 358 359 360
#ifdef CONFIG_CRASH_DUMP
/*
 * Read memory corresponding to the old kernel.
 */
361
static ssize_t read_oldmem(struct file *file, char __user *buf,
362 363
				size_t count, loff_t *ppos)
{
364 365 366
	unsigned long pfn, offset;
	size_t read = 0, csize;
	int rc = 0;
367

M
Maneesh Soni 已提交
368
	while (count) {
369
		pfn = *ppos / PAGE_SIZE;
370 371
		if (pfn > saved_max_pfn)
			return read;
372

373 374 375 376 377
		offset = (unsigned long)(*ppos % PAGE_SIZE);
		if (count > PAGE_SIZE - offset)
			csize = PAGE_SIZE - offset;
		else
			csize = count;
378

379 380 381
		rc = copy_oldmem_page(pfn, buf, csize, offset, 1);
		if (rc < 0)
			return rc;
382 383 384 385 386 387 388 389
		buf += csize;
		*ppos += csize;
		read += csize;
		count -= csize;
	}
	return read;
}
#endif
L
Linus Torvalds 已提交
390

391
#ifdef CONFIG_DEVKMEM
L
Linus Torvalds 已提交
392 393 394
/*
 * This function reads the *virtual* memory as seen by the kernel.
 */
A
Andrew Morton 已提交
395
static ssize_t read_kmem(struct file *file, char __user *buf,
L
Linus Torvalds 已提交
396 397 398 399 400
			 size_t count, loff_t *ppos)
{
	unsigned long p = *ppos;
	ssize_t low_count, read, sz;
	char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
401
	int err = 0;
L
Linus Torvalds 已提交
402 403 404 405

	read = 0;
	if (p < (unsigned long) high_memory) {
		low_count = count;
A
Andrew Morton 已提交
406 407
		if (count > (unsigned long)high_memory - p)
			low_count = (unsigned long)high_memory - p;
L
Linus Torvalds 已提交
408 409 410 411

#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
		/* we don't have page 0 mapped on sparc and m68k.. */
		if (p < PAGE_SIZE && low_count > 0) {
412 413
			sz = size_inside_page(p, low_count);
			if (clear_user(buf, sz))
L
Linus Torvalds 已提交
414
				return -EFAULT;
415 416 417 418 419
			buf += sz;
			p += sz;
			read += sz;
			low_count -= sz;
			count -= sz;
L
Linus Torvalds 已提交
420 421 422
		}
#endif
		while (low_count > 0) {
423
			sz = size_inside_page(p, low_count);
L
Linus Torvalds 已提交
424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446

			/*
			 * On ia64 if a page has been mapped somewhere as
			 * uncached, then it must also be accessed uncached
			 * by the kernel or data corruption may occur
			 */
			kbuf = xlate_dev_kmem_ptr((char *)p);

			if (copy_to_user(buf, kbuf, sz))
				return -EFAULT;
			buf += sz;
			p += sz;
			read += sz;
			low_count -= sz;
			count -= sz;
		}
	}

	if (count > 0) {
		kbuf = (char *)__get_free_page(GFP_KERNEL);
		if (!kbuf)
			return -ENOMEM;
		while (count > 0) {
447
			sz = size_inside_page(p, count);
448 449 450 451
			if (!is_vmalloc_or_module_addr((void *)p)) {
				err = -ENXIO;
				break;
			}
452 453
			sz = vread(kbuf, (char *)p, sz);
			if (!sz)
L
Linus Torvalds 已提交
454
				break;
455
			if (copy_to_user(buf, kbuf, sz)) {
456 457
				err = -EFAULT;
				break;
L
Linus Torvalds 已提交
458
			}
459 460 461 462
			count -= sz;
			buf += sz;
			read += sz;
			p += sz;
L
Linus Torvalds 已提交
463 464 465
		}
		free_page((unsigned long)kbuf);
	}
466 467
	*ppos = p;
	return read ? read : err;
L
Linus Torvalds 已提交
468 469 470
}


A
Andrew Morton 已提交
471 472
static ssize_t do_write_kmem(unsigned long p, const char __user *buf,
				size_t count, loff_t *ppos)
L
Linus Torvalds 已提交
473 474 475 476 477 478 479
{
	ssize_t written, sz;
	unsigned long copied;

	written = 0;
#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
	/* we don't have page 0 mapped on sparc and m68k.. */
480 481
	if (p < PAGE_SIZE) {
		sz = size_inside_page(p, count);
L
Linus Torvalds 已提交
482 483 484 485 486 487 488 489 490 491 492
		/* Hmm. Do something? */
		buf += sz;
		p += sz;
		count -= sz;
		written += sz;
	}
#endif

	while (count > 0) {
		char *ptr;

493
		sz = size_inside_page(p, count);
L
Linus Torvalds 已提交
494 495

		/*
A
Andrew Morton 已提交
496 497 498
		 * On ia64 if a page has been mapped somewhere as uncached, then
		 * it must also be accessed uncached by the kernel or data
		 * corruption may occur.
L
Linus Torvalds 已提交
499
		 */
500
		ptr = xlate_dev_kmem_ptr((char *)p);
L
Linus Torvalds 已提交
501 502 503

		copied = copy_from_user(ptr, buf, sz);
		if (copied) {
504 505 506
			written += sz - copied;
			if (written)
				break;
L
Linus Torvalds 已提交
507 508 509 510 511 512 513 514 515 516 517 518 519 520 521
			return -EFAULT;
		}
		buf += sz;
		p += sz;
		count -= sz;
		written += sz;
	}

	*ppos += written;
	return written;
}

/*
 * This function writes to the *virtual* memory as seen by the kernel.
 */
A
Andrew Morton 已提交
522
static ssize_t write_kmem(struct file *file, const char __user *buf,
L
Linus Torvalds 已提交
523 524 525 526 527 528
			  size_t count, loff_t *ppos)
{
	unsigned long p = *ppos;
	ssize_t wrote = 0;
	ssize_t virtr = 0;
	char * kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */
529
	int err = 0;
L
Linus Torvalds 已提交
530 531

	if (p < (unsigned long) high_memory) {
532 533
		unsigned long to_write = min_t(unsigned long, count,
					       (unsigned long)high_memory - p);
534
		wrote = do_write_kmem(p, buf, to_write, ppos);
535 536
		if (wrote != to_write)
			return wrote;
L
Linus Torvalds 已提交
537 538 539 540 541 542 543 544 545 546
		p += wrote;
		buf += wrote;
		count -= wrote;
	}

	if (count > 0) {
		kbuf = (char *)__get_free_page(GFP_KERNEL);
		if (!kbuf)
			return wrote ? wrote : -ENOMEM;
		while (count > 0) {
547 548
			unsigned long sz = size_inside_page(p, count);
			unsigned long n;
L
Linus Torvalds 已提交
549

550 551 552 553
			if (!is_vmalloc_or_module_addr((void *)p)) {
				err = -ENXIO;
				break;
			}
554 555
			n = copy_from_user(kbuf, buf, sz);
			if (n) {
556 557
				err = -EFAULT;
				break;
L
Linus Torvalds 已提交
558
			}
559
			vwrite(kbuf, (char *)p, sz);
560 561 562 563
			count -= sz;
			buf += sz;
			virtr += sz;
			p += sz;
L
Linus Torvalds 已提交
564 565 566 567
		}
		free_page((unsigned long)kbuf);
	}

568 569
	*ppos = p;
	return virtr + wrote ? : err;
L
Linus Torvalds 已提交
570
}
571
#endif
L
Linus Torvalds 已提交
572

573
#ifdef CONFIG_DEVPORT
A
Andrew Morton 已提交
574
static ssize_t read_port(struct file *file, char __user *buf,
L
Linus Torvalds 已提交
575 576 577 578 579 580
			 size_t count, loff_t *ppos)
{
	unsigned long i = *ppos;
	char __user *tmp = buf;

	if (!access_ok(VERIFY_WRITE, buf, count))
A
Andrew Morton 已提交
581
		return -EFAULT;
L
Linus Torvalds 已提交
582
	while (count-- > 0 && i < 65536) {
A
Andrew Morton 已提交
583 584
		if (__put_user(inb(i), tmp) < 0)
			return -EFAULT;
L
Linus Torvalds 已提交
585 586 587 588 589 590 591
		i++;
		tmp++;
	}
	*ppos = i;
	return tmp-buf;
}

A
Andrew Morton 已提交
592
static ssize_t write_port(struct file *file, const char __user *buf,
L
Linus Torvalds 已提交
593 594 595 596 597
			  size_t count, loff_t *ppos)
{
	unsigned long i = *ppos;
	const char __user * tmp = buf;

A
Andrew Morton 已提交
598
	if (!access_ok(VERIFY_READ, buf, count))
L
Linus Torvalds 已提交
599 600 601
		return -EFAULT;
	while (count-- > 0 && i < 65536) {
		char c;
602 603 604
		if (__get_user(c, tmp)) {
			if (tmp > buf)
				break;
A
Andrew Morton 已提交
605
			return -EFAULT;
606
		}
A
Andrew Morton 已提交
607
		outb(c, i);
L
Linus Torvalds 已提交
608 609 610 611 612 613 614 615
		i++;
		tmp++;
	}
	*ppos = i;
	return tmp-buf;
}
#endif

A
Andrew Morton 已提交
616
static ssize_t read_null(struct file *file, char __user *buf,
L
Linus Torvalds 已提交
617 618 619 620 621
			 size_t count, loff_t *ppos)
{
	return 0;
}

A
Andrew Morton 已提交
622
static ssize_t write_null(struct file *file, const char __user *buf,
L
Linus Torvalds 已提交
623 624 625 626 627
			  size_t count, loff_t *ppos)
{
	return count;
}

628 629 630 631 632 633
static int pipe_to_null(struct pipe_inode_info *info, struct pipe_buffer *buf,
			struct splice_desc *sd)
{
	return sd->len;
}

A
Andrew Morton 已提交
634
static ssize_t splice_write_null(struct pipe_inode_info *pipe, struct file *out,
635 636 637 638 639
				 loff_t *ppos, size_t len, unsigned int flags)
{
	return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_null);
}

A
Andrew Morton 已提交
640
static ssize_t read_zero(struct file *file, char __user *buf,
L
Linus Torvalds 已提交
641 642
			 size_t count, loff_t *ppos)
{
N
Nick Piggin 已提交
643
	size_t written;
L
Linus Torvalds 已提交
644 645 646 647 648 649 650

	if (!count)
		return 0;

	if (!access_ok(VERIFY_WRITE, buf, count))
		return -EFAULT;

N
Nick Piggin 已提交
651 652 653 654
	written = 0;
	while (count) {
		unsigned long unwritten;
		size_t chunk = count;
L
Linus Torvalds 已提交
655

N
Nick Piggin 已提交
656 657
		if (chunk > PAGE_SIZE)
			chunk = PAGE_SIZE;	/* Just for latency reasons */
658
		unwritten = __clear_user(buf, chunk);
N
Nick Piggin 已提交
659
		written += chunk - unwritten;
L
Linus Torvalds 已提交
660
		if (unwritten)
N
Nick Piggin 已提交
661
			break;
662 663
		if (signal_pending(current))
			return written ? written : -ERESTARTSYS;
L
Linus Torvalds 已提交
664
		buf += chunk;
N
Nick Piggin 已提交
665
		count -= chunk;
L
Linus Torvalds 已提交
666 667
		cond_resched();
	}
N
Nick Piggin 已提交
668
	return written ? written : -EFAULT;
L
Linus Torvalds 已提交
669 670
}

A
Andrew Morton 已提交
671
static int mmap_zero(struct file *file, struct vm_area_struct *vma)
L
Linus Torvalds 已提交
672
{
N
Nick Piggin 已提交
673
#ifndef CONFIG_MMU
L
Linus Torvalds 已提交
674
	return -ENOSYS;
N
Nick Piggin 已提交
675 676 677 678
#endif
	if (vma->vm_flags & VM_SHARED)
		return shmem_zero_setup(vma);
	return 0;
L
Linus Torvalds 已提交
679 680
}

A
Andrew Morton 已提交
681
static ssize_t write_full(struct file *file, const char __user *buf,
L
Linus Torvalds 已提交
682 683 684 685 686 687 688 689 690 691
			  size_t count, loff_t *ppos)
{
	return -ENOSPC;
}

/*
 * Special lseek() function for /dev/null and /dev/zero.  Most notably, you
 * can fopen() both devices with "a" now.  This was previously impossible.
 * -- SRB.
 */
A
Andrew Morton 已提交
692
static loff_t null_lseek(struct file *file, loff_t offset, int orig)
L
Linus Torvalds 已提交
693 694 695 696 697 698 699 700 701 702 703 704
{
	return file->f_pos = 0;
}

/*
 * The memory devices use the full 32/64 bits of the offset, and so we cannot
 * check against negative addresses: they are ok. The return value is weird,
 * though, in that case (0).
 *
 * also note that seeking relative to the "end of file" isn't supported:
 * it has no meaning, so it returns -EINVAL.
 */
A
Andrew Morton 已提交
705
static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
L
Linus Torvalds 已提交
706 707 708
{
	loff_t ret;

709
	mutex_lock(&file->f_path.dentry->d_inode->i_mutex);
L
Linus Torvalds 已提交
710
	switch (orig) {
A
Andrew Morton 已提交
711 712 713 714 715 716
	case SEEK_CUR:
		offset += file->f_pos;
	case SEEK_SET:
		/* to avoid userland mistaking f_pos=-9 as -EBADF=-9 */
		if ((unsigned long long)offset >= ~0xFFFULL) {
			ret = -EOVERFLOW;
L
Linus Torvalds 已提交
717
			break;
A
Andrew Morton 已提交
718 719 720 721 722 723 724
		}
		file->f_pos = offset;
		ret = file->f_pos;
		force_successful_syscall_return();
		break;
	default:
		ret = -EINVAL;
L
Linus Torvalds 已提交
725
	}
726
	mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
L
Linus Torvalds 已提交
727 728 729 730 731 732 733 734 735 736 737 738 739 740
	return ret;
}

static int open_port(struct inode * inode, struct file * filp)
{
	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
}

#define zero_lseek	null_lseek
#define full_lseek      null_lseek
#define write_zero	write_null
#define read_full       read_zero
#define open_mem	open_port
#define open_kmem	open_mem
741
#define open_oldmem	open_mem
L
Linus Torvalds 已提交
742

743
static const struct file_operations mem_fops = {
L
Linus Torvalds 已提交
744 745 746 747 748
	.llseek		= memory_lseek,
	.read		= read_mem,
	.write		= write_mem,
	.mmap		= mmap_mem,
	.open		= open_mem,
749
	.get_unmapped_area = get_unmapped_area_mem,
L
Linus Torvalds 已提交
750 751
};

752
#ifdef CONFIG_DEVKMEM
753
static const struct file_operations kmem_fops = {
L
Linus Torvalds 已提交
754 755 756 757 758
	.llseek		= memory_lseek,
	.read		= read_kmem,
	.write		= write_kmem,
	.mmap		= mmap_kmem,
	.open		= open_kmem,
759
	.get_unmapped_area = get_unmapped_area_mem,
L
Linus Torvalds 已提交
760
};
761
#endif
L
Linus Torvalds 已提交
762

763
static const struct file_operations null_fops = {
L
Linus Torvalds 已提交
764 765 766
	.llseek		= null_lseek,
	.read		= read_null,
	.write		= write_null,
767
	.splice_write	= splice_write_null,
L
Linus Torvalds 已提交
768 769
};

770
#ifdef CONFIG_DEVPORT
771
static const struct file_operations port_fops = {
L
Linus Torvalds 已提交
772 773 774 775 776 777 778
	.llseek		= memory_lseek,
	.read		= read_port,
	.write		= write_port,
	.open		= open_port,
};
#endif

779
static const struct file_operations zero_fops = {
L
Linus Torvalds 已提交
780 781 782 783 784 785
	.llseek		= zero_lseek,
	.read		= read_zero,
	.write		= write_zero,
	.mmap		= mmap_zero,
};

786 787 788 789
/*
 * capabilities for /dev/zero
 * - permits private mappings, "copies" are taken of the source of zeros
 */
L
Linus Torvalds 已提交
790
static struct backing_dev_info zero_bdi = {
791
	.name		= "char/mem",
L
Linus Torvalds 已提交
792 793 794
	.capabilities	= BDI_CAP_MAP_COPY,
};

795
static const struct file_operations full_fops = {
L
Linus Torvalds 已提交
796 797 798 799 800
	.llseek		= full_lseek,
	.read		= read_full,
	.write		= write_full,
};

801
#ifdef CONFIG_CRASH_DUMP
802
static const struct file_operations oldmem_fops = {
803 804 805 806 807
	.read	= read_oldmem,
	.open	= open_oldmem,
};
#endif

A
Andrew Morton 已提交
808
static ssize_t kmsg_write(struct file *file, const char __user *buf,
L
Linus Torvalds 已提交
809 810 811
			  size_t count, loff_t *ppos)
{
	char *tmp;
812
	ssize_t ret;
L
Linus Torvalds 已提交
813 814 815 816 817 818 819 820

	tmp = kmalloc(count + 1, GFP_KERNEL);
	if (tmp == NULL)
		return -ENOMEM;
	ret = -EFAULT;
	if (!copy_from_user(tmp, buf, count)) {
		tmp[count] = 0;
		ret = printk("%s", tmp);
821 822 823
		if (ret > count)
			/* printk can add a prefix */
			ret = count;
L
Linus Torvalds 已提交
824 825 826 827 828
	}
	kfree(tmp);
	return ret;
}

829
static const struct file_operations kmsg_fops = {
A
Andrew Morton 已提交
830
	.write = kmsg_write,
L
Linus Torvalds 已提交
831 832
};

833 834
static const struct memdev {
	const char *name;
835
	mode_t mode;
836 837 838
	const struct file_operations *fops;
	struct backing_dev_info *dev_info;
} devlist[] = {
839
	 [1] = { "mem", 0, &mem_fops, &directly_mappable_cdev_bdi },
840
#ifdef CONFIG_DEVKMEM
841
	 [2] = { "kmem", 0, &kmem_fops, &directly_mappable_cdev_bdi },
842
#endif
843
	 [3] = { "null", 0666, &null_fops, NULL },
844
#ifdef CONFIG_DEVPORT
845
	 [4] = { "port", 0, &port_fops, NULL },
L
Linus Torvalds 已提交
846
#endif
847 848 849 850 851
	 [5] = { "zero", 0666, &zero_fops, &zero_bdi },
	 [7] = { "full", 0666, &full_fops, NULL },
	 [8] = { "random", 0666, &random_fops, NULL },
	 [9] = { "urandom", 0666, &urandom_fops, NULL },
	[11] = { "kmsg", 0, &kmsg_fops, NULL },
852
#ifdef CONFIG_CRASH_DUMP
853
	[12] = { "oldmem", 0, &oldmem_fops, NULL },
854
#endif
855 856 857 858
};

static int memory_open(struct inode *inode, struct file *filp)
{
859 860
	int minor;
	const struct memdev *dev;
861

862 863
	minor = iminor(inode);
	if (minor >= ARRAY_SIZE(devlist))
864
		return -ENXIO;
865

866 867
	dev = &devlist[minor];
	if (!dev->fops)
868
		return -ENXIO;
869

870 871 872
	filp->f_op = dev->fops;
	if (dev->dev_info)
		filp->f_mapping->backing_dev_info = dev->dev_info;
873

874
	if (dev->fops->open)
875 876 877
		return dev->fops->open(inode, filp);

	return 0;
L
Linus Torvalds 已提交
878 879
}

880
static const struct file_operations memory_fops = {
A
Andrew Morton 已提交
881
	.open = memory_open,
L
Linus Torvalds 已提交
882 883
};

884 885 886 887 888 889 890
static char *mem_devnode(struct device *dev, mode_t *mode)
{
	if (mode && devlist[MINOR(dev->devt)].mode)
		*mode = devlist[MINOR(dev->devt)].mode;
	return NULL;
}

891
static struct class *mem_class;
L
Linus Torvalds 已提交
892 893 894

static int __init chr_dev_init(void)
{
895
	int minor;
P
Peter Zijlstra 已提交
896 897 898 899 900
	int err;

	err = bdi_init(&zero_bdi);
	if (err)
		return err;
L
Linus Torvalds 已提交
901

A
Andrew Morton 已提交
902
	if (register_chrdev(MEM_MAJOR, "mem", &memory_fops))
L
Linus Torvalds 已提交
903 904
		printk("unable to get major %d for memory devs\n", MEM_MAJOR);

905
	mem_class = class_create(THIS_MODULE, "mem");
906 907 908
	if (IS_ERR(mem_class))
		return PTR_ERR(mem_class);

909
	mem_class->devnode = mem_devnode;
910 911 912 913 914 915
	for (minor = 1; minor < ARRAY_SIZE(devlist); minor++) {
		if (!devlist[minor].name)
			continue;
		device_create(mem_class, NULL, MKDEV(MEM_MAJOR, minor),
			      NULL, devlist[minor].name);
	}
916

L
Linus Torvalds 已提交
917 918 919 920
	return 0;
}

fs_initcall(chr_dev_init);