mem.c 19.7 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5
/*
 *  linux/drivers/char/mem.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
A
Andrew Morton 已提交
6
 *  Added devfs support.
L
Linus Torvalds 已提交
7
 *    Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu>
8
 *  Shared /dev/zero mmapping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
L
Linus Torvalds 已提交
9 10 11 12 13 14 15 16 17 18 19 20 21 22
 */

#include <linux/mm.h>
#include <linux/miscdevice.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/mman.h>
#include <linux/random.h>
#include <linux/init.h>
#include <linux/raw.h>
#include <linux/tty.h>
#include <linux/capability.h>
#include <linux/ptrace.h>
#include <linux/device.h>
23 24
#include <linux/highmem.h>
#include <linux/crash_dump.h>
L
Linus Torvalds 已提交
25
#include <linux/backing-dev.h>
26
#include <linux/bootmem.h>
27
#include <linux/splice.h>
28
#include <linux/pfn.h>
L
Linus Torvalds 已提交
29 30 31 32 33 34 35 36

#include <asm/uaccess.h>
#include <asm/io.h>

#ifdef CONFIG_IA64
# include <linux/efi.h>
#endif

37 38 39 40 41
static inline unsigned long size_inside_page(unsigned long start,
					     unsigned long size)
{
	unsigned long sz;

42
	sz = PAGE_SIZE - (start & (PAGE_SIZE - 1));
43

44
	return min(sz, size);
45 46
}

L
Linus Torvalds 已提交
47
#ifndef ARCH_HAS_VALID_PHYS_ADDR_RANGE
48
static inline int valid_phys_addr_range(unsigned long addr, size_t count)
L
Linus Torvalds 已提交
49
{
50
	return addr + count <= __pa(high_memory);
L
Linus Torvalds 已提交
51
}
52

53
static inline int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
54 55 56
{
	return 1;
}
L
Linus Torvalds 已提交
57 58
#endif

59
#ifdef CONFIG_STRICT_DEVMEM
60
static inline int range_is_allowed(unsigned long pfn, unsigned long size)
61
{
62 63 64 65 66 67 68 69
	u64 from = ((u64)pfn) << PAGE_SHIFT;
	u64 to = from + size;
	u64 cursor = from;

	while (cursor < to) {
		if (!devmem_is_allowed(pfn)) {
			printk(KERN_INFO
		"Program %s tried to access /dev/mem between %Lx->%Lx.\n",
70 71 72
				current->comm, from, to);
			return 0;
		}
73 74
		cursor += PAGE_SIZE;
		pfn++;
75 76 77 78
	}
	return 1;
}
#else
79
static inline int range_is_allowed(unsigned long pfn, unsigned long size)
80 81 82 83 84
{
	return 1;
}
#endif

A
Andrew Morton 已提交
85
void __weak unxlate_dev_mem_ptr(unsigned long phys, void *addr)
86 87 88
{
}

L
Linus Torvalds 已提交
89
/*
A
Andrew Morton 已提交
90 91
 * This funcion reads the *physical* memory. The f_pos points directly to the
 * memory location.
L
Linus Torvalds 已提交
92
 */
A
Andrew Morton 已提交
93
static ssize_t read_mem(struct file *file, char __user *buf,
L
Linus Torvalds 已提交
94 95 96 97 98 99
			size_t count, loff_t *ppos)
{
	unsigned long p = *ppos;
	ssize_t read, sz;
	char *ptr;

100
	if (!valid_phys_addr_range(p, count))
L
Linus Torvalds 已提交
101 102 103 104 105
		return -EFAULT;
	read = 0;
#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
	/* we don't have page 0 mapped on sparc and m68k.. */
	if (p < PAGE_SIZE) {
106
		sz = size_inside_page(p, count);
L
Linus Torvalds 已提交
107 108 109
		if (sz > 0) {
			if (clear_user(buf, sz))
				return -EFAULT;
A
Andrew Morton 已提交
110 111 112 113
			buf += sz;
			p += sz;
			count -= sz;
			read += sz;
L
Linus Torvalds 已提交
114 115 116 117 118
		}
	}
#endif

	while (count > 0) {
119 120
		unsigned long remaining;

121
		sz = size_inside_page(p, count);
L
Linus Torvalds 已提交
122

123 124 125
		if (!range_is_allowed(p >> PAGE_SHIFT, count))
			return -EPERM;

L
Linus Torvalds 已提交
126
		/*
A
Andrew Morton 已提交
127 128 129
		 * On ia64 if a page has been mapped somewhere as uncached, then
		 * it must also be accessed uncached by the kernel or data
		 * corruption may occur.
L
Linus Torvalds 已提交
130 131
		 */
		ptr = xlate_dev_mem_ptr(p);
132 133
		if (!ptr)
			return -EFAULT;
L
Linus Torvalds 已提交
134

135
		remaining = copy_to_user(buf, ptr, sz);
136
		unxlate_dev_mem_ptr(p, ptr);
137 138
		if (remaining)
			return -EFAULT;
139

L
Linus Torvalds 已提交
140 141 142 143 144 145 146 147 148 149
		buf += sz;
		p += sz;
		count -= sz;
		read += sz;
	}

	*ppos += read;
	return read;
}

A
Andrew Morton 已提交
150
static ssize_t write_mem(struct file *file, const char __user *buf,
L
Linus Torvalds 已提交
151 152 153 154 155 156 157
			 size_t count, loff_t *ppos)
{
	unsigned long p = *ppos;
	ssize_t written, sz;
	unsigned long copied;
	void *ptr;

158
	if (!valid_phys_addr_range(p, count))
L
Linus Torvalds 已提交
159 160 161 162 163 164 165
		return -EFAULT;

	written = 0;

#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
	/* we don't have page 0 mapped on sparc and m68k.. */
	if (p < PAGE_SIZE) {
166
		sz = size_inside_page(p, count);
L
Linus Torvalds 已提交
167 168 169 170 171 172 173 174 175
		/* Hmm. Do something? */
		buf += sz;
		p += sz;
		count -= sz;
		written += sz;
	}
#endif

	while (count > 0) {
176
		sz = size_inside_page(p, count);
L
Linus Torvalds 已提交
177

178 179 180
		if (!range_is_allowed(p >> PAGE_SHIFT, sz))
			return -EPERM;

L
Linus Torvalds 已提交
181
		/*
A
Andrew Morton 已提交
182 183 184
		 * On ia64 if a page has been mapped somewhere as uncached, then
		 * it must also be accessed uncached by the kernel or data
		 * corruption may occur.
L
Linus Torvalds 已提交
185 186
		 */
		ptr = xlate_dev_mem_ptr(p);
187 188 189 190 191
		if (!ptr) {
			if (written)
				break;
			return -EFAULT;
		}
L
Linus Torvalds 已提交
192 193

		copied = copy_from_user(ptr, buf, sz);
194
		unxlate_dev_mem_ptr(p, ptr);
L
Linus Torvalds 已提交
195
		if (copied) {
196 197 198
			written += sz - copied;
			if (written)
				break;
L
Linus Torvalds 已提交
199 200
			return -EFAULT;
		}
201

L
Linus Torvalds 已提交
202 203 204 205 206 207 208 209 210 211
		buf += sz;
		p += sz;
		count -= sz;
		written += sz;
	}

	*ppos += written;
	return written;
}

A
Andrew Morton 已提交
212
int __weak phys_mem_access_prot_allowed(struct file *file,
213 214 215 216 217
	unsigned long pfn, unsigned long size, pgprot_t *vma_prot)
{
	return 1;
}

218
#ifndef __HAVE_PHYS_MEM_ACCESS_PROT
A
Andrew Morton 已提交
219 220 221 222 223 224

/*
 * Architectures vary in how they handle caching for addresses
 * outside of main memory.
 *
 */
225
#ifdef pgprot_noncached
A
Andrew Morton 已提交
226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
static int uncached_access(struct file *file, unsigned long addr)
{
#if defined(CONFIG_IA64)
	/*
	 * On ia64, we ignore O_DSYNC because we cannot tolerate memory
	 * attribute aliases.
	 */
	return !(efi_mem_attributes(addr) & EFI_MEMORY_WB);
#elif defined(CONFIG_MIPS)
	{
		extern int __uncached_access(struct file *file,
					     unsigned long addr);

		return __uncached_access(file, addr);
	}
#else
	/*
	 * Accessing memory above the top the kernel knows about or through a
	 * file pointer
	 * that was marked O_DSYNC will be done non-cached.
	 */
	if (file->f_flags & O_DSYNC)
		return 1;
	return addr >= __pa(high_memory);
#endif
}
252
#endif
A
Andrew Morton 已提交
253

254 255 256 257 258 259 260 261 262 263 264 265 266
static pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
				     unsigned long size, pgprot_t vma_prot)
{
#ifdef pgprot_noncached
	unsigned long offset = pfn << PAGE_SHIFT;

	if (uncached_access(file, offset))
		return pgprot_noncached(vma_prot);
#endif
	return vma_prot;
}
#endif

267 268 269 270 271 272 273 274 275
#ifndef CONFIG_MMU
static unsigned long get_unmapped_area_mem(struct file *file,
					   unsigned long addr,
					   unsigned long len,
					   unsigned long pgoff,
					   unsigned long flags)
{
	if (!valid_mmap_phys_addr_range(pgoff, len))
		return (unsigned long) -EINVAL;
276
	return pgoff << PAGE_SHIFT;
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
}

/* can't do an in-place private mapping if there's no MMU */
static inline int private_mapping_ok(struct vm_area_struct *vma)
{
	return vma->vm_flags & VM_MAYSHARE;
}
#else
#define get_unmapped_area_mem	NULL

static inline int private_mapping_ok(struct vm_area_struct *vma)
{
	return 1;
}
#endif

293
static const struct vm_operations_struct mmap_mem_ops = {
294 295 296
#ifdef CONFIG_HAVE_IOREMAP_PROT
	.access = generic_access_phys
#endif
297 298
};

A
Andrew Morton 已提交
299
static int mmap_mem(struct file *file, struct vm_area_struct *vma)
L
Linus Torvalds 已提交
300
{
301 302
	size_t size = vma->vm_end - vma->vm_start;

303
	if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))
304 305
		return -EINVAL;

306 307 308
	if (!private_mapping_ok(vma))
		return -ENOSYS;

309 310 311
	if (!range_is_allowed(vma->vm_pgoff, size))
		return -EPERM;

312 313 314 315
	if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size,
						&vma->vm_page_prot))
		return -EINVAL;

316
	vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
317
						 size,
L
Linus Torvalds 已提交
318 319
						 vma->vm_page_prot);

320 321
	vma->vm_ops = &mmap_mem_ops;

L
Linus Torvalds 已提交
322 323 324 325
	/* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */
	if (remap_pfn_range(vma,
			    vma->vm_start,
			    vma->vm_pgoff,
326
			    size,
327
			    vma->vm_page_prot)) {
L
Linus Torvalds 已提交
328
		return -EAGAIN;
329
	}
L
Linus Torvalds 已提交
330 331 332
	return 0;
}

333
#ifdef CONFIG_DEVKMEM
A
Andrew Morton 已提交
334
static int mmap_kmem(struct file *file, struct vm_area_struct *vma)
L
Linus Torvalds 已提交
335
{
L
Linus Torvalds 已提交
336 337
	unsigned long pfn;

338 339
	/* Turn a kernel-virtual address into a physical page frame */
	pfn = __pa((u64)vma->vm_pgoff << PAGE_SHIFT) >> PAGE_SHIFT;
L
Linus Torvalds 已提交
340

L
Linus Torvalds 已提交
341
	/*
A
Andrew Morton 已提交
342 343 344
	 * RED-PEN: on some architectures there is more mapped memory than
	 * available in mem_map which pfn_valid checks for. Perhaps should add a
	 * new macro here.
L
Linus Torvalds 已提交
345 346 347
	 *
	 * RED-PEN: vmalloc is not supported right now.
	 */
L
Linus Torvalds 已提交
348
	if (!pfn_valid(pfn))
L
Linus Torvalds 已提交
349
		return -EIO;
L
Linus Torvalds 已提交
350 351

	vma->vm_pgoff = pfn;
L
Linus Torvalds 已提交
352 353
	return mmap_mem(file, vma);
}
354
#endif
L
Linus Torvalds 已提交
355

356 357 358 359
#ifdef CONFIG_CRASH_DUMP
/*
 * Read memory corresponding to the old kernel.
 */
360
static ssize_t read_oldmem(struct file *file, char __user *buf,
361 362
				size_t count, loff_t *ppos)
{
363 364 365
	unsigned long pfn, offset;
	size_t read = 0, csize;
	int rc = 0;
366

M
Maneesh Soni 已提交
367
	while (count) {
368
		pfn = *ppos / PAGE_SIZE;
369 370
		if (pfn > saved_max_pfn)
			return read;
371

372 373 374 375 376
		offset = (unsigned long)(*ppos % PAGE_SIZE);
		if (count > PAGE_SIZE - offset)
			csize = PAGE_SIZE - offset;
		else
			csize = count;
377

378 379 380
		rc = copy_oldmem_page(pfn, buf, csize, offset, 1);
		if (rc < 0)
			return rc;
381 382 383 384 385 386 387 388
		buf += csize;
		*ppos += csize;
		read += csize;
		count -= csize;
	}
	return read;
}
#endif
L
Linus Torvalds 已提交
389

390
#ifdef CONFIG_DEVKMEM
L
Linus Torvalds 已提交
391 392 393
/*
 * This function reads the *virtual* memory as seen by the kernel.
 */
A
Andrew Morton 已提交
394
static ssize_t read_kmem(struct file *file, char __user *buf,
L
Linus Torvalds 已提交
395 396 397 398 399
			 size_t count, loff_t *ppos)
{
	unsigned long p = *ppos;
	ssize_t low_count, read, sz;
	char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
400
	int err = 0;
L
Linus Torvalds 已提交
401 402 403 404

	read = 0;
	if (p < (unsigned long) high_memory) {
		low_count = count;
A
Andrew Morton 已提交
405 406
		if (count > (unsigned long)high_memory - p)
			low_count = (unsigned long)high_memory - p;
L
Linus Torvalds 已提交
407 408 409 410

#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
		/* we don't have page 0 mapped on sparc and m68k.. */
		if (p < PAGE_SIZE && low_count > 0) {
411 412
			sz = size_inside_page(p, low_count);
			if (clear_user(buf, sz))
L
Linus Torvalds 已提交
413
				return -EFAULT;
414 415 416 417 418
			buf += sz;
			p += sz;
			read += sz;
			low_count -= sz;
			count -= sz;
L
Linus Torvalds 已提交
419 420 421
		}
#endif
		while (low_count > 0) {
422
			sz = size_inside_page(p, low_count);
L
Linus Torvalds 已提交
423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445

			/*
			 * On ia64 if a page has been mapped somewhere as
			 * uncached, then it must also be accessed uncached
			 * by the kernel or data corruption may occur
			 */
			kbuf = xlate_dev_kmem_ptr((char *)p);

			if (copy_to_user(buf, kbuf, sz))
				return -EFAULT;
			buf += sz;
			p += sz;
			read += sz;
			low_count -= sz;
			count -= sz;
		}
	}

	if (count > 0) {
		kbuf = (char *)__get_free_page(GFP_KERNEL);
		if (!kbuf)
			return -ENOMEM;
		while (count > 0) {
446
			sz = size_inside_page(p, count);
447 448 449 450
			if (!is_vmalloc_or_module_addr((void *)p)) {
				err = -ENXIO;
				break;
			}
451 452
			sz = vread(kbuf, (char *)p, sz);
			if (!sz)
L
Linus Torvalds 已提交
453
				break;
454
			if (copy_to_user(buf, kbuf, sz)) {
455 456
				err = -EFAULT;
				break;
L
Linus Torvalds 已提交
457
			}
458 459 460 461
			count -= sz;
			buf += sz;
			read += sz;
			p += sz;
L
Linus Torvalds 已提交
462 463 464
		}
		free_page((unsigned long)kbuf);
	}
465 466
	*ppos = p;
	return read ? read : err;
L
Linus Torvalds 已提交
467 468 469
}


A
Andrew Morton 已提交
470 471
static ssize_t do_write_kmem(unsigned long p, const char __user *buf,
				size_t count, loff_t *ppos)
L
Linus Torvalds 已提交
472 473 474 475 476 477 478
{
	ssize_t written, sz;
	unsigned long copied;

	written = 0;
#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
	/* we don't have page 0 mapped on sparc and m68k.. */
479 480
	if (p < PAGE_SIZE) {
		sz = size_inside_page(p, count);
L
Linus Torvalds 已提交
481 482 483 484 485 486 487 488 489 490 491
		/* Hmm. Do something? */
		buf += sz;
		p += sz;
		count -= sz;
		written += sz;
	}
#endif

	while (count > 0) {
		char *ptr;

492
		sz = size_inside_page(p, count);
L
Linus Torvalds 已提交
493 494

		/*
A
Andrew Morton 已提交
495 496 497
		 * On ia64 if a page has been mapped somewhere as uncached, then
		 * it must also be accessed uncached by the kernel or data
		 * corruption may occur.
L
Linus Torvalds 已提交
498
		 */
499
		ptr = xlate_dev_kmem_ptr((char *)p);
L
Linus Torvalds 已提交
500 501 502

		copied = copy_from_user(ptr, buf, sz);
		if (copied) {
503 504 505
			written += sz - copied;
			if (written)
				break;
L
Linus Torvalds 已提交
506 507 508 509 510 511 512 513 514 515 516 517 518 519 520
			return -EFAULT;
		}
		buf += sz;
		p += sz;
		count -= sz;
		written += sz;
	}

	*ppos += written;
	return written;
}

/*
 * This function writes to the *virtual* memory as seen by the kernel.
 */
A
Andrew Morton 已提交
521
static ssize_t write_kmem(struct file *file, const char __user *buf,
L
Linus Torvalds 已提交
522 523 524 525 526 527
			  size_t count, loff_t *ppos)
{
	unsigned long p = *ppos;
	ssize_t wrote = 0;
	ssize_t virtr = 0;
	char * kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */
528
	int err = 0;
L
Linus Torvalds 已提交
529 530

	if (p < (unsigned long) high_memory) {
531 532
		unsigned long to_write = min_t(unsigned long, count,
					       (unsigned long)high_memory - p);
533
		wrote = do_write_kmem(p, buf, to_write, ppos);
534 535
		if (wrote != to_write)
			return wrote;
L
Linus Torvalds 已提交
536 537 538 539 540 541 542 543 544 545
		p += wrote;
		buf += wrote;
		count -= wrote;
	}

	if (count > 0) {
		kbuf = (char *)__get_free_page(GFP_KERNEL);
		if (!kbuf)
			return wrote ? wrote : -ENOMEM;
		while (count > 0) {
546 547
			unsigned long sz = size_inside_page(p, count);
			unsigned long n;
L
Linus Torvalds 已提交
548

549 550 551 552
			if (!is_vmalloc_or_module_addr((void *)p)) {
				err = -ENXIO;
				break;
			}
553 554
			n = copy_from_user(kbuf, buf, sz);
			if (n) {
555 556
				err = -EFAULT;
				break;
L
Linus Torvalds 已提交
557
			}
558
			vwrite(kbuf, (char *)p, sz);
559 560 561 562
			count -= sz;
			buf += sz;
			virtr += sz;
			p += sz;
L
Linus Torvalds 已提交
563 564 565 566
		}
		free_page((unsigned long)kbuf);
	}

567 568
	*ppos = p;
	return virtr + wrote ? : err;
L
Linus Torvalds 已提交
569
}
570
#endif
L
Linus Torvalds 已提交
571

572
#ifdef CONFIG_DEVPORT
A
Andrew Morton 已提交
573
static ssize_t read_port(struct file *file, char __user *buf,
L
Linus Torvalds 已提交
574 575 576 577 578 579
			 size_t count, loff_t *ppos)
{
	unsigned long i = *ppos;
	char __user *tmp = buf;

	if (!access_ok(VERIFY_WRITE, buf, count))
A
Andrew Morton 已提交
580
		return -EFAULT;
L
Linus Torvalds 已提交
581
	while (count-- > 0 && i < 65536) {
A
Andrew Morton 已提交
582 583
		if (__put_user(inb(i), tmp) < 0)
			return -EFAULT;
L
Linus Torvalds 已提交
584 585 586 587 588 589 590
		i++;
		tmp++;
	}
	*ppos = i;
	return tmp-buf;
}

A
Andrew Morton 已提交
591
static ssize_t write_port(struct file *file, const char __user *buf,
L
Linus Torvalds 已提交
592 593 594 595 596
			  size_t count, loff_t *ppos)
{
	unsigned long i = *ppos;
	const char __user * tmp = buf;

A
Andrew Morton 已提交
597
	if (!access_ok(VERIFY_READ, buf, count))
L
Linus Torvalds 已提交
598 599 600
		return -EFAULT;
	while (count-- > 0 && i < 65536) {
		char c;
601 602 603
		if (__get_user(c, tmp)) {
			if (tmp > buf)
				break;
A
Andrew Morton 已提交
604
			return -EFAULT;
605
		}
A
Andrew Morton 已提交
606
		outb(c, i);
L
Linus Torvalds 已提交
607 608 609 610 611 612 613 614
		i++;
		tmp++;
	}
	*ppos = i;
	return tmp-buf;
}
#endif

A
Andrew Morton 已提交
615
static ssize_t read_null(struct file *file, char __user *buf,
L
Linus Torvalds 已提交
616 617 618 619 620
			 size_t count, loff_t *ppos)
{
	return 0;
}

A
Andrew Morton 已提交
621
static ssize_t write_null(struct file *file, const char __user *buf,
L
Linus Torvalds 已提交
622 623 624 625 626
			  size_t count, loff_t *ppos)
{
	return count;
}

627 628 629 630 631 632
static int pipe_to_null(struct pipe_inode_info *info, struct pipe_buffer *buf,
			struct splice_desc *sd)
{
	return sd->len;
}

A
Andrew Morton 已提交
633
static ssize_t splice_write_null(struct pipe_inode_info *pipe, struct file *out,
634 635 636 637 638
				 loff_t *ppos, size_t len, unsigned int flags)
{
	return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_null);
}

A
Andrew Morton 已提交
639
static ssize_t read_zero(struct file *file, char __user *buf,
L
Linus Torvalds 已提交
640 641
			 size_t count, loff_t *ppos)
{
N
Nick Piggin 已提交
642
	size_t written;
L
Linus Torvalds 已提交
643 644 645 646 647 648 649

	if (!count)
		return 0;

	if (!access_ok(VERIFY_WRITE, buf, count))
		return -EFAULT;

N
Nick Piggin 已提交
650 651 652 653
	written = 0;
	while (count) {
		unsigned long unwritten;
		size_t chunk = count;
L
Linus Torvalds 已提交
654

N
Nick Piggin 已提交
655 656
		if (chunk > PAGE_SIZE)
			chunk = PAGE_SIZE;	/* Just for latency reasons */
657
		unwritten = __clear_user(buf, chunk);
N
Nick Piggin 已提交
658
		written += chunk - unwritten;
L
Linus Torvalds 已提交
659
		if (unwritten)
N
Nick Piggin 已提交
660
			break;
661 662
		if (signal_pending(current))
			return written ? written : -ERESTARTSYS;
L
Linus Torvalds 已提交
663
		buf += chunk;
N
Nick Piggin 已提交
664
		count -= chunk;
L
Linus Torvalds 已提交
665 666
		cond_resched();
	}
N
Nick Piggin 已提交
667
	return written ? written : -EFAULT;
L
Linus Torvalds 已提交
668 669
}

A
Andrew Morton 已提交
670
static int mmap_zero(struct file *file, struct vm_area_struct *vma)
L
Linus Torvalds 已提交
671
{
N
Nick Piggin 已提交
672
#ifndef CONFIG_MMU
L
Linus Torvalds 已提交
673
	return -ENOSYS;
N
Nick Piggin 已提交
674 675 676 677
#endif
	if (vma->vm_flags & VM_SHARED)
		return shmem_zero_setup(vma);
	return 0;
L
Linus Torvalds 已提交
678 679
}

A
Andrew Morton 已提交
680
static ssize_t write_full(struct file *file, const char __user *buf,
L
Linus Torvalds 已提交
681 682 683 684 685 686 687 688 689 690
			  size_t count, loff_t *ppos)
{
	return -ENOSPC;
}

/*
 * Special lseek() function for /dev/null and /dev/zero.  Most notably, you
 * can fopen() both devices with "a" now.  This was previously impossible.
 * -- SRB.
 */
A
Andrew Morton 已提交
691
static loff_t null_lseek(struct file *file, loff_t offset, int orig)
L
Linus Torvalds 已提交
692 693 694 695 696 697 698 699 700 701 702 703
{
	return file->f_pos = 0;
}

/*
 * The memory devices use the full 32/64 bits of the offset, and so we cannot
 * check against negative addresses: they are ok. The return value is weird,
 * though, in that case (0).
 *
 * also note that seeking relative to the "end of file" isn't supported:
 * it has no meaning, so it returns -EINVAL.
 */
A
Andrew Morton 已提交
704
static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
L
Linus Torvalds 已提交
705 706 707
{
	loff_t ret;

708
	mutex_lock(&file->f_path.dentry->d_inode->i_mutex);
L
Linus Torvalds 已提交
709
	switch (orig) {
A
Andrew Morton 已提交
710 711 712 713 714 715
	case SEEK_CUR:
		offset += file->f_pos;
	case SEEK_SET:
		/* to avoid userland mistaking f_pos=-9 as -EBADF=-9 */
		if ((unsigned long long)offset >= ~0xFFFULL) {
			ret = -EOVERFLOW;
L
Linus Torvalds 已提交
716
			break;
A
Andrew Morton 已提交
717 718 719 720 721 722 723
		}
		file->f_pos = offset;
		ret = file->f_pos;
		force_successful_syscall_return();
		break;
	default:
		ret = -EINVAL;
L
Linus Torvalds 已提交
724
	}
725
	mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
L
Linus Torvalds 已提交
726 727 728 729 730 731 732 733 734 735 736 737 738 739
	return ret;
}

static int open_port(struct inode * inode, struct file * filp)
{
	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
}

#define zero_lseek	null_lseek
#define full_lseek      null_lseek
#define write_zero	write_null
#define read_full       read_zero
#define open_mem	open_port
#define open_kmem	open_mem
740
#define open_oldmem	open_mem
L
Linus Torvalds 已提交
741

742
static const struct file_operations mem_fops = {
L
Linus Torvalds 已提交
743 744 745 746 747
	.llseek		= memory_lseek,
	.read		= read_mem,
	.write		= write_mem,
	.mmap		= mmap_mem,
	.open		= open_mem,
748
	.get_unmapped_area = get_unmapped_area_mem,
L
Linus Torvalds 已提交
749 750
};

751
#ifdef CONFIG_DEVKMEM
752
static const struct file_operations kmem_fops = {
L
Linus Torvalds 已提交
753 754 755 756 757
	.llseek		= memory_lseek,
	.read		= read_kmem,
	.write		= write_kmem,
	.mmap		= mmap_kmem,
	.open		= open_kmem,
758
	.get_unmapped_area = get_unmapped_area_mem,
L
Linus Torvalds 已提交
759
};
760
#endif
L
Linus Torvalds 已提交
761

762
static const struct file_operations null_fops = {
L
Linus Torvalds 已提交
763 764 765
	.llseek		= null_lseek,
	.read		= read_null,
	.write		= write_null,
766
	.splice_write	= splice_write_null,
L
Linus Torvalds 已提交
767 768
};

769
#ifdef CONFIG_DEVPORT
770
static const struct file_operations port_fops = {
L
Linus Torvalds 已提交
771 772 773 774 775 776 777
	.llseek		= memory_lseek,
	.read		= read_port,
	.write		= write_port,
	.open		= open_port,
};
#endif

778
static const struct file_operations zero_fops = {
L
Linus Torvalds 已提交
779 780 781 782 783 784
	.llseek		= zero_lseek,
	.read		= read_zero,
	.write		= write_zero,
	.mmap		= mmap_zero,
};

785 786 787
/*
 * capabilities for /dev/zero
 * - permits private mappings, "copies" are taken of the source of zeros
788
 * - no writeback happens
789
 */
L
Linus Torvalds 已提交
790
static struct backing_dev_info zero_bdi = {
791
	.name		= "char/mem",
792
	.capabilities	= BDI_CAP_MAP_COPY | BDI_CAP_NO_ACCT_AND_WRITEBACK,
L
Linus Torvalds 已提交
793 794
};

795
static const struct file_operations full_fops = {
L
Linus Torvalds 已提交
796 797 798 799 800
	.llseek		= full_lseek,
	.read		= read_full,
	.write		= write_full,
};

801
#ifdef CONFIG_CRASH_DUMP
802
static const struct file_operations oldmem_fops = {
803 804
	.read	= read_oldmem,
	.open	= open_oldmem,
805
	.llseek = default_llseek,
806 807 808
};
#endif

809 810
static ssize_t kmsg_writev(struct kiocb *iocb, const struct iovec *iv,
			   unsigned long count, loff_t pos)
L
Linus Torvalds 已提交
811
{
812
	char *line, *p;
813
	int i;
814
	ssize_t ret = -EFAULT;
815
	size_t len = iov_length(iv, count);
L
Linus Torvalds 已提交
816

817
	line = kmalloc(len + 1, GFP_KERNEL);
818
	if (line == NULL)
L
Linus Torvalds 已提交
819
		return -ENOMEM;
820 821 822 823 824

	/*
	 * copy all vectors into a single string, to ensure we do
	 * not interleave our log line with other printk calls
	 */
825
	p = line;
826 827 828 829
	for (i = 0; i < count; i++) {
		if (copy_from_user(p, iv[i].iov_base, iv[i].iov_len))
			goto out;
		p += iv[i].iov_len;
L
Linus Torvalds 已提交
830
	}
831 832 833 834 835 836 837 838
	p[0] = '\0';

	ret = printk("%s", line);
	/* printk can add a prefix */
	if (ret > len)
		ret = len;
out:
	kfree(line);
L
Linus Torvalds 已提交
839 840 841
	return ret;
}

842
static const struct file_operations kmsg_fops = {
843
	.aio_write = kmsg_writev,
844
	.llseek = noop_llseek,
L
Linus Torvalds 已提交
845 846
};

847 848
static const struct memdev {
	const char *name;
849
	mode_t mode;
850 851 852
	const struct file_operations *fops;
	struct backing_dev_info *dev_info;
} devlist[] = {
853
	 [1] = { "mem", 0, &mem_fops, &directly_mappable_cdev_bdi },
854
#ifdef CONFIG_DEVKMEM
855
	 [2] = { "kmem", 0, &kmem_fops, &directly_mappable_cdev_bdi },
856
#endif
857
	 [3] = { "null", 0666, &null_fops, NULL },
858
#ifdef CONFIG_DEVPORT
859
	 [4] = { "port", 0, &port_fops, NULL },
L
Linus Torvalds 已提交
860
#endif
861 862 863 864 865
	 [5] = { "zero", 0666, &zero_fops, &zero_bdi },
	 [7] = { "full", 0666, &full_fops, NULL },
	 [8] = { "random", 0666, &random_fops, NULL },
	 [9] = { "urandom", 0666, &urandom_fops, NULL },
	[11] = { "kmsg", 0, &kmsg_fops, NULL },
866
#ifdef CONFIG_CRASH_DUMP
867
	[12] = { "oldmem", 0, &oldmem_fops, NULL },
868
#endif
869 870 871 872
};

static int memory_open(struct inode *inode, struct file *filp)
{
873 874
	int minor;
	const struct memdev *dev;
875

876 877
	minor = iminor(inode);
	if (minor >= ARRAY_SIZE(devlist))
878
		return -ENXIO;
879

880 881
	dev = &devlist[minor];
	if (!dev->fops)
882
		return -ENXIO;
883

884 885 886
	filp->f_op = dev->fops;
	if (dev->dev_info)
		filp->f_mapping->backing_dev_info = dev->dev_info;
887

888 889 890 891
	/* Is /dev/mem or /dev/kmem ? */
	if (dev->dev_info == &directly_mappable_cdev_bdi)
		filp->f_mode |= FMODE_UNSIGNED_OFFSET;

892
	if (dev->fops->open)
893 894 895
		return dev->fops->open(inode, filp);

	return 0;
L
Linus Torvalds 已提交
896 897
}

898
static const struct file_operations memory_fops = {
A
Andrew Morton 已提交
899
	.open = memory_open,
900
	.llseek = noop_llseek,
L
Linus Torvalds 已提交
901 902
};

903 904 905 906 907 908 909
static char *mem_devnode(struct device *dev, mode_t *mode)
{
	if (mode && devlist[MINOR(dev->devt)].mode)
		*mode = devlist[MINOR(dev->devt)].mode;
	return NULL;
}

910
static struct class *mem_class;
L
Linus Torvalds 已提交
911 912 913

static int __init chr_dev_init(void)
{
914
	int minor;
P
Peter Zijlstra 已提交
915 916 917 918 919
	int err;

	err = bdi_init(&zero_bdi);
	if (err)
		return err;
L
Linus Torvalds 已提交
920

A
Andrew Morton 已提交
921
	if (register_chrdev(MEM_MAJOR, "mem", &memory_fops))
L
Linus Torvalds 已提交
922 923
		printk("unable to get major %d for memory devs\n", MEM_MAJOR);

924
	mem_class = class_create(THIS_MODULE, "mem");
925 926 927
	if (IS_ERR(mem_class))
		return PTR_ERR(mem_class);

928
	mem_class->devnode = mem_devnode;
929 930 931 932 933 934
	for (minor = 1; minor < ARRAY_SIZE(devlist); minor++) {
		if (!devlist[minor].name)
			continue;
		device_create(mem_class, NULL, MKDEV(MEM_MAJOR, minor),
			      NULL, devlist[minor].name);
	}
935

936
	return tty_init();
L
Linus Torvalds 已提交
937 938 939
}

fs_initcall(chr_dev_init);