mem.c 19.5 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5
/*
 *  linux/drivers/char/mem.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
A
Andrew Morton 已提交
6
 *  Added devfs support.
L
Linus Torvalds 已提交
7
 *    Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu>
8
 *  Shared /dev/zero mmapping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
L
Linus Torvalds 已提交
9 10 11 12 13 14 15 16 17 18 19 20 21 22
 */

#include <linux/mm.h>
#include <linux/miscdevice.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/mman.h>
#include <linux/random.h>
#include <linux/init.h>
#include <linux/raw.h>
#include <linux/tty.h>
#include <linux/capability.h>
#include <linux/ptrace.h>
#include <linux/device.h>
23 24
#include <linux/highmem.h>
#include <linux/crash_dump.h>
L
Linus Torvalds 已提交
25
#include <linux/backing-dev.h>
26
#include <linux/bootmem.h>
27
#include <linux/splice.h>
28
#include <linux/pfn.h>
L
Linus Torvalds 已提交
29 30 31 32 33 34 35 36

#include <asm/uaccess.h>
#include <asm/io.h>

#ifdef CONFIG_IA64
# include <linux/efi.h>
#endif

37 38 39 40 41
static inline unsigned long size_inside_page(unsigned long start,
					     unsigned long size)
{
	unsigned long sz;

42
	sz = PAGE_SIZE - (start & (PAGE_SIZE - 1));
43

44
	return min(sz, size);
45 46
}

L
Linus Torvalds 已提交
47
#ifndef ARCH_HAS_VALID_PHYS_ADDR_RANGE
48
static inline int valid_phys_addr_range(unsigned long addr, size_t count)
L
Linus Torvalds 已提交
49
{
50
	if (addr + count > __pa(high_memory))
L
Linus Torvalds 已提交
51 52 53 54
		return 0;

	return 1;
}
55

56
static inline int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
57 58 59
{
	return 1;
}
L
Linus Torvalds 已提交
60 61
#endif

62
#ifdef CONFIG_STRICT_DEVMEM
63
static inline int range_is_allowed(unsigned long pfn, unsigned long size)
64
{
65 66 67 68 69 70 71 72
	u64 from = ((u64)pfn) << PAGE_SHIFT;
	u64 to = from + size;
	u64 cursor = from;

	while (cursor < to) {
		if (!devmem_is_allowed(pfn)) {
			printk(KERN_INFO
		"Program %s tried to access /dev/mem between %Lx->%Lx.\n",
73 74 75
				current->comm, from, to);
			return 0;
		}
76 77
		cursor += PAGE_SIZE;
		pfn++;
78 79 80 81
	}
	return 1;
}
#else
82
static inline int range_is_allowed(unsigned long pfn, unsigned long size)
83 84 85 86 87
{
	return 1;
}
#endif

A
Andrew Morton 已提交
88
void __weak unxlate_dev_mem_ptr(unsigned long phys, void *addr)
89 90 91
{
}

L
Linus Torvalds 已提交
92
/*
A
Andrew Morton 已提交
93 94
 * This funcion reads the *physical* memory. The f_pos points directly to the
 * memory location.
L
Linus Torvalds 已提交
95
 */
A
Andrew Morton 已提交
96
static ssize_t read_mem(struct file *file, char __user *buf,
L
Linus Torvalds 已提交
97 98 99 100 101 102
			size_t count, loff_t *ppos)
{
	unsigned long p = *ppos;
	ssize_t read, sz;
	char *ptr;

103
	if (!valid_phys_addr_range(p, count))
L
Linus Torvalds 已提交
104 105 106 107 108
		return -EFAULT;
	read = 0;
#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
	/* we don't have page 0 mapped on sparc and m68k.. */
	if (p < PAGE_SIZE) {
109
		sz = size_inside_page(p, count);
L
Linus Torvalds 已提交
110 111 112
		if (sz > 0) {
			if (clear_user(buf, sz))
				return -EFAULT;
A
Andrew Morton 已提交
113 114 115 116
			buf += sz;
			p += sz;
			count -= sz;
			read += sz;
L
Linus Torvalds 已提交
117 118 119 120 121
		}
	}
#endif

	while (count > 0) {
122 123
		unsigned long remaining;

124
		sz = size_inside_page(p, count);
L
Linus Torvalds 已提交
125

126 127 128
		if (!range_is_allowed(p >> PAGE_SHIFT, count))
			return -EPERM;

L
Linus Torvalds 已提交
129
		/*
A
Andrew Morton 已提交
130 131 132
		 * On ia64 if a page has been mapped somewhere as uncached, then
		 * it must also be accessed uncached by the kernel or data
		 * corruption may occur.
L
Linus Torvalds 已提交
133 134
		 */
		ptr = xlate_dev_mem_ptr(p);
135 136
		if (!ptr)
			return -EFAULT;
L
Linus Torvalds 已提交
137

138
		remaining = copy_to_user(buf, ptr, sz);
139
		unxlate_dev_mem_ptr(p, ptr);
140 141
		if (remaining)
			return -EFAULT;
142

L
Linus Torvalds 已提交
143 144 145 146 147 148 149 150 151 152
		buf += sz;
		p += sz;
		count -= sz;
		read += sz;
	}

	*ppos += read;
	return read;
}

A
Andrew Morton 已提交
153
static ssize_t write_mem(struct file *file, const char __user *buf,
L
Linus Torvalds 已提交
154 155 156 157 158 159 160
			 size_t count, loff_t *ppos)
{
	unsigned long p = *ppos;
	ssize_t written, sz;
	unsigned long copied;
	void *ptr;

161
	if (!valid_phys_addr_range(p, count))
L
Linus Torvalds 已提交
162 163 164 165 166 167 168
		return -EFAULT;

	written = 0;

#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
	/* we don't have page 0 mapped on sparc and m68k.. */
	if (p < PAGE_SIZE) {
169
		sz = size_inside_page(p, count);
L
Linus Torvalds 已提交
170 171 172 173 174 175 176 177 178
		/* Hmm. Do something? */
		buf += sz;
		p += sz;
		count -= sz;
		written += sz;
	}
#endif

	while (count > 0) {
179
		sz = size_inside_page(p, count);
L
Linus Torvalds 已提交
180

181 182 183
		if (!range_is_allowed(p >> PAGE_SHIFT, sz))
			return -EPERM;

L
Linus Torvalds 已提交
184
		/*
A
Andrew Morton 已提交
185 186 187
		 * On ia64 if a page has been mapped somewhere as uncached, then
		 * it must also be accessed uncached by the kernel or data
		 * corruption may occur.
L
Linus Torvalds 已提交
188 189
		 */
		ptr = xlate_dev_mem_ptr(p);
190 191 192 193 194
		if (!ptr) {
			if (written)
				break;
			return -EFAULT;
		}
L
Linus Torvalds 已提交
195 196

		copied = copy_from_user(ptr, buf, sz);
197
		unxlate_dev_mem_ptr(p, ptr);
L
Linus Torvalds 已提交
198
		if (copied) {
199 200 201
			written += sz - copied;
			if (written)
				break;
L
Linus Torvalds 已提交
202 203
			return -EFAULT;
		}
204

L
Linus Torvalds 已提交
205 206 207 208 209 210 211 212 213 214
		buf += sz;
		p += sz;
		count -= sz;
		written += sz;
	}

	*ppos += written;
	return written;
}

A
Andrew Morton 已提交
215
int __weak phys_mem_access_prot_allowed(struct file *file,
216 217 218 219 220
	unsigned long pfn, unsigned long size, pgprot_t *vma_prot)
{
	return 1;
}

221
#ifndef __HAVE_PHYS_MEM_ACCESS_PROT
A
Andrew Morton 已提交
222 223 224 225 226 227

/*
 * Architectures vary in how they handle caching for addresses
 * outside of main memory.
 *
 */
228
#ifdef pgprot_noncached
A
Andrew Morton 已提交
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
static int uncached_access(struct file *file, unsigned long addr)
{
#if defined(CONFIG_IA64)
	/*
	 * On ia64, we ignore O_DSYNC because we cannot tolerate memory
	 * attribute aliases.
	 */
	return !(efi_mem_attributes(addr) & EFI_MEMORY_WB);
#elif defined(CONFIG_MIPS)
	{
		extern int __uncached_access(struct file *file,
					     unsigned long addr);

		return __uncached_access(file, addr);
	}
#else
	/*
	 * Accessing memory above the top the kernel knows about or through a
	 * file pointer
	 * that was marked O_DSYNC will be done non-cached.
	 */
	if (file->f_flags & O_DSYNC)
		return 1;
	return addr >= __pa(high_memory);
#endif
}
255
#endif
A
Andrew Morton 已提交
256

257 258 259 260 261 262 263 264 265 266 267 268 269
static pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
				     unsigned long size, pgprot_t vma_prot)
{
#ifdef pgprot_noncached
	unsigned long offset = pfn << PAGE_SHIFT;

	if (uncached_access(file, offset))
		return pgprot_noncached(vma_prot);
#endif
	return vma_prot;
}
#endif

270 271 272 273 274 275 276 277 278
#ifndef CONFIG_MMU
static unsigned long get_unmapped_area_mem(struct file *file,
					   unsigned long addr,
					   unsigned long len,
					   unsigned long pgoff,
					   unsigned long flags)
{
	if (!valid_mmap_phys_addr_range(pgoff, len))
		return (unsigned long) -EINVAL;
279
	return pgoff << PAGE_SHIFT;
280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
}

/* can't do an in-place private mapping if there's no MMU */
static inline int private_mapping_ok(struct vm_area_struct *vma)
{
	return vma->vm_flags & VM_MAYSHARE;
}
#else
#define get_unmapped_area_mem	NULL

static inline int private_mapping_ok(struct vm_area_struct *vma)
{
	return 1;
}
#endif

296
static const struct vm_operations_struct mmap_mem_ops = {
297 298 299
#ifdef CONFIG_HAVE_IOREMAP_PROT
	.access = generic_access_phys
#endif
300 301
};

A
Andrew Morton 已提交
302
static int mmap_mem(struct file *file, struct vm_area_struct *vma)
L
Linus Torvalds 已提交
303
{
304 305
	size_t size = vma->vm_end - vma->vm_start;

306
	if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))
307 308
		return -EINVAL;

309 310 311
	if (!private_mapping_ok(vma))
		return -ENOSYS;

312 313 314
	if (!range_is_allowed(vma->vm_pgoff, size))
		return -EPERM;

315 316 317 318
	if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size,
						&vma->vm_page_prot))
		return -EINVAL;

319
	vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
320
						 size,
L
Linus Torvalds 已提交
321 322
						 vma->vm_page_prot);

323 324
	vma->vm_ops = &mmap_mem_ops;

L
Linus Torvalds 已提交
325 326 327 328
	/* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */
	if (remap_pfn_range(vma,
			    vma->vm_start,
			    vma->vm_pgoff,
329
			    size,
330
			    vma->vm_page_prot)) {
L
Linus Torvalds 已提交
331
		return -EAGAIN;
332
	}
L
Linus Torvalds 已提交
333 334 335
	return 0;
}

336
#ifdef CONFIG_DEVKMEM
A
Andrew Morton 已提交
337
static int mmap_kmem(struct file *file, struct vm_area_struct *vma)
L
Linus Torvalds 已提交
338
{
L
Linus Torvalds 已提交
339 340
	unsigned long pfn;

341 342
	/* Turn a kernel-virtual address into a physical page frame */
	pfn = __pa((u64)vma->vm_pgoff << PAGE_SHIFT) >> PAGE_SHIFT;
L
Linus Torvalds 已提交
343

L
Linus Torvalds 已提交
344
	/*
A
Andrew Morton 已提交
345 346 347
	 * RED-PEN: on some architectures there is more mapped memory than
	 * available in mem_map which pfn_valid checks for. Perhaps should add a
	 * new macro here.
L
Linus Torvalds 已提交
348 349 350
	 *
	 * RED-PEN: vmalloc is not supported right now.
	 */
L
Linus Torvalds 已提交
351
	if (!pfn_valid(pfn))
L
Linus Torvalds 已提交
352
		return -EIO;
L
Linus Torvalds 已提交
353 354

	vma->vm_pgoff = pfn;
L
Linus Torvalds 已提交
355 356
	return mmap_mem(file, vma);
}
357
#endif
L
Linus Torvalds 已提交
358

359 360 361 362
#ifdef CONFIG_CRASH_DUMP
/*
 * Read memory corresponding to the old kernel.
 */
363
static ssize_t read_oldmem(struct file *file, char __user *buf,
364 365
				size_t count, loff_t *ppos)
{
366 367 368
	unsigned long pfn, offset;
	size_t read = 0, csize;
	int rc = 0;
369

M
Maneesh Soni 已提交
370
	while (count) {
371
		pfn = *ppos / PAGE_SIZE;
372 373
		if (pfn > saved_max_pfn)
			return read;
374

375 376 377 378 379
		offset = (unsigned long)(*ppos % PAGE_SIZE);
		if (count > PAGE_SIZE - offset)
			csize = PAGE_SIZE - offset;
		else
			csize = count;
380

381 382 383
		rc = copy_oldmem_page(pfn, buf, csize, offset, 1);
		if (rc < 0)
			return rc;
384 385 386 387 388 389 390 391
		buf += csize;
		*ppos += csize;
		read += csize;
		count -= csize;
	}
	return read;
}
#endif
L
Linus Torvalds 已提交
392

393
#ifdef CONFIG_DEVKMEM
L
Linus Torvalds 已提交
394 395 396
/*
 * This function reads the *virtual* memory as seen by the kernel.
 */
A
Andrew Morton 已提交
397
static ssize_t read_kmem(struct file *file, char __user *buf,
L
Linus Torvalds 已提交
398 399 400 401 402
			 size_t count, loff_t *ppos)
{
	unsigned long p = *ppos;
	ssize_t low_count, read, sz;
	char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
403
	int err = 0;
L
Linus Torvalds 已提交
404 405 406 407

	read = 0;
	if (p < (unsigned long) high_memory) {
		low_count = count;
A
Andrew Morton 已提交
408 409
		if (count > (unsigned long)high_memory - p)
			low_count = (unsigned long)high_memory - p;
L
Linus Torvalds 已提交
410 411 412 413

#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
		/* we don't have page 0 mapped on sparc and m68k.. */
		if (p < PAGE_SIZE && low_count > 0) {
414 415
			sz = size_inside_page(p, low_count);
			if (clear_user(buf, sz))
L
Linus Torvalds 已提交
416
				return -EFAULT;
417 418 419 420 421
			buf += sz;
			p += sz;
			read += sz;
			low_count -= sz;
			count -= sz;
L
Linus Torvalds 已提交
422 423 424
		}
#endif
		while (low_count > 0) {
425
			sz = size_inside_page(p, low_count);
L
Linus Torvalds 已提交
426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448

			/*
			 * On ia64 if a page has been mapped somewhere as
			 * uncached, then it must also be accessed uncached
			 * by the kernel or data corruption may occur
			 */
			kbuf = xlate_dev_kmem_ptr((char *)p);

			if (copy_to_user(buf, kbuf, sz))
				return -EFAULT;
			buf += sz;
			p += sz;
			read += sz;
			low_count -= sz;
			count -= sz;
		}
	}

	if (count > 0) {
		kbuf = (char *)__get_free_page(GFP_KERNEL);
		if (!kbuf)
			return -ENOMEM;
		while (count > 0) {
449
			sz = size_inside_page(p, count);
450 451 452 453
			if (!is_vmalloc_or_module_addr((void *)p)) {
				err = -ENXIO;
				break;
			}
454 455
			sz = vread(kbuf, (char *)p, sz);
			if (!sz)
L
Linus Torvalds 已提交
456
				break;
457
			if (copy_to_user(buf, kbuf, sz)) {
458 459
				err = -EFAULT;
				break;
L
Linus Torvalds 已提交
460
			}
461 462 463 464
			count -= sz;
			buf += sz;
			read += sz;
			p += sz;
L
Linus Torvalds 已提交
465 466 467
		}
		free_page((unsigned long)kbuf);
	}
468 469
	*ppos = p;
	return read ? read : err;
L
Linus Torvalds 已提交
470 471 472
}


A
Andrew Morton 已提交
473 474
static ssize_t do_write_kmem(unsigned long p, const char __user *buf,
				size_t count, loff_t *ppos)
L
Linus Torvalds 已提交
475 476 477 478 479 480 481
{
	ssize_t written, sz;
	unsigned long copied;

	written = 0;
#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
	/* we don't have page 0 mapped on sparc and m68k.. */
482 483
	if (p < PAGE_SIZE) {
		sz = size_inside_page(p, count);
L
Linus Torvalds 已提交
484 485 486 487 488 489 490 491 492 493 494
		/* Hmm. Do something? */
		buf += sz;
		p += sz;
		count -= sz;
		written += sz;
	}
#endif

	while (count > 0) {
		char *ptr;

495
		sz = size_inside_page(p, count);
L
Linus Torvalds 已提交
496 497

		/*
A
Andrew Morton 已提交
498 499 500
		 * On ia64 if a page has been mapped somewhere as uncached, then
		 * it must also be accessed uncached by the kernel or data
		 * corruption may occur.
L
Linus Torvalds 已提交
501
		 */
502
		ptr = xlate_dev_kmem_ptr((char *)p);
L
Linus Torvalds 已提交
503 504 505

		copied = copy_from_user(ptr, buf, sz);
		if (copied) {
506 507 508
			written += sz - copied;
			if (written)
				break;
L
Linus Torvalds 已提交
509 510 511 512 513 514 515 516 517 518 519 520 521 522 523
			return -EFAULT;
		}
		buf += sz;
		p += sz;
		count -= sz;
		written += sz;
	}

	*ppos += written;
	return written;
}

/*
 * This function writes to the *virtual* memory as seen by the kernel.
 */
A
Andrew Morton 已提交
524
static ssize_t write_kmem(struct file *file, const char __user *buf,
L
Linus Torvalds 已提交
525 526 527 528 529 530
			  size_t count, loff_t *ppos)
{
	unsigned long p = *ppos;
	ssize_t wrote = 0;
	ssize_t virtr = 0;
	char * kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */
531
	int err = 0;
L
Linus Torvalds 已提交
532 533

	if (p < (unsigned long) high_memory) {
534 535
		unsigned long to_write = min_t(unsigned long, count,
					       (unsigned long)high_memory - p);
536
		wrote = do_write_kmem(p, buf, to_write, ppos);
537 538
		if (wrote != to_write)
			return wrote;
L
Linus Torvalds 已提交
539 540 541 542 543 544 545 546 547 548
		p += wrote;
		buf += wrote;
		count -= wrote;
	}

	if (count > 0) {
		kbuf = (char *)__get_free_page(GFP_KERNEL);
		if (!kbuf)
			return wrote ? wrote : -ENOMEM;
		while (count > 0) {
549 550
			unsigned long sz = size_inside_page(p, count);
			unsigned long n;
L
Linus Torvalds 已提交
551

552 553 554 555
			if (!is_vmalloc_or_module_addr((void *)p)) {
				err = -ENXIO;
				break;
			}
556 557
			n = copy_from_user(kbuf, buf, sz);
			if (n) {
558 559
				err = -EFAULT;
				break;
L
Linus Torvalds 已提交
560
			}
561
			vwrite(kbuf, (char *)p, sz);
562 563 564 565
			count -= sz;
			buf += sz;
			virtr += sz;
			p += sz;
L
Linus Torvalds 已提交
566 567 568 569
		}
		free_page((unsigned long)kbuf);
	}

570 571
	*ppos = p;
	return virtr + wrote ? : err;
L
Linus Torvalds 已提交
572
}
573
#endif
L
Linus Torvalds 已提交
574

575
#ifdef CONFIG_DEVPORT
A
Andrew Morton 已提交
576
static ssize_t read_port(struct file *file, char __user *buf,
L
Linus Torvalds 已提交
577 578 579 580 581 582
			 size_t count, loff_t *ppos)
{
	unsigned long i = *ppos;
	char __user *tmp = buf;

	if (!access_ok(VERIFY_WRITE, buf, count))
A
Andrew Morton 已提交
583
		return -EFAULT;
L
Linus Torvalds 已提交
584
	while (count-- > 0 && i < 65536) {
A
Andrew Morton 已提交
585 586
		if (__put_user(inb(i), tmp) < 0)
			return -EFAULT;
L
Linus Torvalds 已提交
587 588 589 590 591 592 593
		i++;
		tmp++;
	}
	*ppos = i;
	return tmp-buf;
}

A
Andrew Morton 已提交
594
static ssize_t write_port(struct file *file, const char __user *buf,
L
Linus Torvalds 已提交
595 596 597 598 599
			  size_t count, loff_t *ppos)
{
	unsigned long i = *ppos;
	const char __user * tmp = buf;

A
Andrew Morton 已提交
600
	if (!access_ok(VERIFY_READ, buf, count))
L
Linus Torvalds 已提交
601 602 603
		return -EFAULT;
	while (count-- > 0 && i < 65536) {
		char c;
604 605 606
		if (__get_user(c, tmp)) {
			if (tmp > buf)
				break;
A
Andrew Morton 已提交
607
			return -EFAULT;
608
		}
A
Andrew Morton 已提交
609
		outb(c, i);
L
Linus Torvalds 已提交
610 611 612 613 614 615 616 617
		i++;
		tmp++;
	}
	*ppos = i;
	return tmp-buf;
}
#endif

A
Andrew Morton 已提交
618
static ssize_t read_null(struct file *file, char __user *buf,
L
Linus Torvalds 已提交
619 620 621 622 623
			 size_t count, loff_t *ppos)
{
	return 0;
}

A
Andrew Morton 已提交
624
static ssize_t write_null(struct file *file, const char __user *buf,
L
Linus Torvalds 已提交
625 626 627 628 629
			  size_t count, loff_t *ppos)
{
	return count;
}

630 631 632 633 634 635
static int pipe_to_null(struct pipe_inode_info *info, struct pipe_buffer *buf,
			struct splice_desc *sd)
{
	return sd->len;
}

A
Andrew Morton 已提交
636
static ssize_t splice_write_null(struct pipe_inode_info *pipe, struct file *out,
637 638 639 640 641
				 loff_t *ppos, size_t len, unsigned int flags)
{
	return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_null);
}

A
Andrew Morton 已提交
642
static ssize_t read_zero(struct file *file, char __user *buf,
L
Linus Torvalds 已提交
643 644
			 size_t count, loff_t *ppos)
{
N
Nick Piggin 已提交
645
	size_t written;
L
Linus Torvalds 已提交
646 647 648 649 650 651 652

	if (!count)
		return 0;

	if (!access_ok(VERIFY_WRITE, buf, count))
		return -EFAULT;

N
Nick Piggin 已提交
653 654 655 656
	written = 0;
	while (count) {
		unsigned long unwritten;
		size_t chunk = count;
L
Linus Torvalds 已提交
657

N
Nick Piggin 已提交
658 659
		if (chunk > PAGE_SIZE)
			chunk = PAGE_SIZE;	/* Just for latency reasons */
660
		unwritten = __clear_user(buf, chunk);
N
Nick Piggin 已提交
661
		written += chunk - unwritten;
L
Linus Torvalds 已提交
662
		if (unwritten)
N
Nick Piggin 已提交
663
			break;
664 665
		if (signal_pending(current))
			return written ? written : -ERESTARTSYS;
L
Linus Torvalds 已提交
666
		buf += chunk;
N
Nick Piggin 已提交
667
		count -= chunk;
L
Linus Torvalds 已提交
668 669
		cond_resched();
	}
N
Nick Piggin 已提交
670
	return written ? written : -EFAULT;
L
Linus Torvalds 已提交
671 672
}

A
Andrew Morton 已提交
673
static int mmap_zero(struct file *file, struct vm_area_struct *vma)
L
Linus Torvalds 已提交
674
{
N
Nick Piggin 已提交
675
#ifndef CONFIG_MMU
L
Linus Torvalds 已提交
676
	return -ENOSYS;
N
Nick Piggin 已提交
677 678 679 680
#endif
	if (vma->vm_flags & VM_SHARED)
		return shmem_zero_setup(vma);
	return 0;
L
Linus Torvalds 已提交
681 682
}

A
Andrew Morton 已提交
683
static ssize_t write_full(struct file *file, const char __user *buf,
L
Linus Torvalds 已提交
684 685 686 687 688 689 690 691 692 693
			  size_t count, loff_t *ppos)
{
	return -ENOSPC;
}

/*
 * Special lseek() function for /dev/null and /dev/zero.  Most notably, you
 * can fopen() both devices with "a" now.  This was previously impossible.
 * -- SRB.
 */
A
Andrew Morton 已提交
694
static loff_t null_lseek(struct file *file, loff_t offset, int orig)
L
Linus Torvalds 已提交
695 696 697 698 699 700 701 702 703 704 705 706
{
	return file->f_pos = 0;
}

/*
 * The memory devices use the full 32/64 bits of the offset, and so we cannot
 * check against negative addresses: they are ok. The return value is weird,
 * though, in that case (0).
 *
 * also note that seeking relative to the "end of file" isn't supported:
 * it has no meaning, so it returns -EINVAL.
 */
A
Andrew Morton 已提交
707
static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
L
Linus Torvalds 已提交
708 709 710
{
	loff_t ret;

711
	mutex_lock(&file->f_path.dentry->d_inode->i_mutex);
L
Linus Torvalds 已提交
712
	switch (orig) {
A
Andrew Morton 已提交
713 714 715 716 717 718
	case SEEK_CUR:
		offset += file->f_pos;
	case SEEK_SET:
		/* to avoid userland mistaking f_pos=-9 as -EBADF=-9 */
		if ((unsigned long long)offset >= ~0xFFFULL) {
			ret = -EOVERFLOW;
L
Linus Torvalds 已提交
719
			break;
A
Andrew Morton 已提交
720 721 722 723 724 725 726
		}
		file->f_pos = offset;
		ret = file->f_pos;
		force_successful_syscall_return();
		break;
	default:
		ret = -EINVAL;
L
Linus Torvalds 已提交
727
	}
728
	mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
L
Linus Torvalds 已提交
729 730 731 732 733 734 735 736 737 738 739 740 741 742
	return ret;
}

static int open_port(struct inode * inode, struct file * filp)
{
	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
}

#define zero_lseek	null_lseek
#define full_lseek      null_lseek
#define write_zero	write_null
#define read_full       read_zero
#define open_mem	open_port
#define open_kmem	open_mem
743
#define open_oldmem	open_mem
L
Linus Torvalds 已提交
744

745
static const struct file_operations mem_fops = {
L
Linus Torvalds 已提交
746 747 748 749 750
	.llseek		= memory_lseek,
	.read		= read_mem,
	.write		= write_mem,
	.mmap		= mmap_mem,
	.open		= open_mem,
751
	.get_unmapped_area = get_unmapped_area_mem,
L
Linus Torvalds 已提交
752 753
};

754
#ifdef CONFIG_DEVKMEM
755
static const struct file_operations kmem_fops = {
L
Linus Torvalds 已提交
756 757 758 759 760
	.llseek		= memory_lseek,
	.read		= read_kmem,
	.write		= write_kmem,
	.mmap		= mmap_kmem,
	.open		= open_kmem,
761
	.get_unmapped_area = get_unmapped_area_mem,
L
Linus Torvalds 已提交
762
};
763
#endif
L
Linus Torvalds 已提交
764

765
static const struct file_operations null_fops = {
L
Linus Torvalds 已提交
766 767 768
	.llseek		= null_lseek,
	.read		= read_null,
	.write		= write_null,
769
	.splice_write	= splice_write_null,
L
Linus Torvalds 已提交
770 771
};

772
#ifdef CONFIG_DEVPORT
773
static const struct file_operations port_fops = {
L
Linus Torvalds 已提交
774 775 776 777 778 779 780
	.llseek		= memory_lseek,
	.read		= read_port,
	.write		= write_port,
	.open		= open_port,
};
#endif

781
static const struct file_operations zero_fops = {
L
Linus Torvalds 已提交
782 783 784 785 786 787
	.llseek		= zero_lseek,
	.read		= read_zero,
	.write		= write_zero,
	.mmap		= mmap_zero,
};

788 789 790
/*
 * capabilities for /dev/zero
 * - permits private mappings, "copies" are taken of the source of zeros
791
 * - no writeback happens
792
 */
L
Linus Torvalds 已提交
793
static struct backing_dev_info zero_bdi = {
794
	.name		= "char/mem",
795
	.capabilities	= BDI_CAP_MAP_COPY | BDI_CAP_NO_ACCT_AND_WRITEBACK,
L
Linus Torvalds 已提交
796 797
};

798
static const struct file_operations full_fops = {
L
Linus Torvalds 已提交
799 800 801 802 803
	.llseek		= full_lseek,
	.read		= read_full,
	.write		= write_full,
};

804
#ifdef CONFIG_CRASH_DUMP
805
static const struct file_operations oldmem_fops = {
806 807
	.read	= read_oldmem,
	.open	= open_oldmem,
808
	.llseek = default_llseek,
809 810 811
};
#endif

A
Andrew Morton 已提交
812
static ssize_t kmsg_write(struct file *file, const char __user *buf,
L
Linus Torvalds 已提交
813 814 815
			  size_t count, loff_t *ppos)
{
	char *tmp;
816
	ssize_t ret;
L
Linus Torvalds 已提交
817 818 819 820 821 822 823 824

	tmp = kmalloc(count + 1, GFP_KERNEL);
	if (tmp == NULL)
		return -ENOMEM;
	ret = -EFAULT;
	if (!copy_from_user(tmp, buf, count)) {
		tmp[count] = 0;
		ret = printk("%s", tmp);
825 826 827
		if (ret > count)
			/* printk can add a prefix */
			ret = count;
L
Linus Torvalds 已提交
828 829 830 831 832
	}
	kfree(tmp);
	return ret;
}

833
static const struct file_operations kmsg_fops = {
A
Andrew Morton 已提交
834
	.write = kmsg_write,
835
	.llseek = noop_llseek,
L
Linus Torvalds 已提交
836 837
};

838 839
static const struct memdev {
	const char *name;
840
	mode_t mode;
841 842 843
	const struct file_operations *fops;
	struct backing_dev_info *dev_info;
} devlist[] = {
844
	 [1] = { "mem", 0, &mem_fops, &directly_mappable_cdev_bdi },
845
#ifdef CONFIG_DEVKMEM
846
	 [2] = { "kmem", 0, &kmem_fops, &directly_mappable_cdev_bdi },
847
#endif
848
	 [3] = { "null", 0666, &null_fops, NULL },
849
#ifdef CONFIG_DEVPORT
850
	 [4] = { "port", 0, &port_fops, NULL },
L
Linus Torvalds 已提交
851
#endif
852 853 854 855 856
	 [5] = { "zero", 0666, &zero_fops, &zero_bdi },
	 [7] = { "full", 0666, &full_fops, NULL },
	 [8] = { "random", 0666, &random_fops, NULL },
	 [9] = { "urandom", 0666, &urandom_fops, NULL },
	[11] = { "kmsg", 0, &kmsg_fops, NULL },
857
#ifdef CONFIG_CRASH_DUMP
858
	[12] = { "oldmem", 0, &oldmem_fops, NULL },
859
#endif
860 861 862 863
};

static int memory_open(struct inode *inode, struct file *filp)
{
864 865
	int minor;
	const struct memdev *dev;
866

867 868
	minor = iminor(inode);
	if (minor >= ARRAY_SIZE(devlist))
869
		return -ENXIO;
870

871 872
	dev = &devlist[minor];
	if (!dev->fops)
873
		return -ENXIO;
874

875 876 877
	filp->f_op = dev->fops;
	if (dev->dev_info)
		filp->f_mapping->backing_dev_info = dev->dev_info;
878

879 880 881 882
	/* Is /dev/mem or /dev/kmem ? */
	if (dev->dev_info == &directly_mappable_cdev_bdi)
		filp->f_mode |= FMODE_UNSIGNED_OFFSET;

883
	if (dev->fops->open)
884 885 886
		return dev->fops->open(inode, filp);

	return 0;
L
Linus Torvalds 已提交
887 888
}

889
static const struct file_operations memory_fops = {
A
Andrew Morton 已提交
890
	.open = memory_open,
891
	.llseek = noop_llseek,
L
Linus Torvalds 已提交
892 893
};

894 895 896 897 898 899 900
static char *mem_devnode(struct device *dev, mode_t *mode)
{
	if (mode && devlist[MINOR(dev->devt)].mode)
		*mode = devlist[MINOR(dev->devt)].mode;
	return NULL;
}

901
static struct class *mem_class;
L
Linus Torvalds 已提交
902 903 904

static int __init chr_dev_init(void)
{
905
	int minor;
P
Peter Zijlstra 已提交
906 907 908 909 910
	int err;

	err = bdi_init(&zero_bdi);
	if (err)
		return err;
L
Linus Torvalds 已提交
911

A
Andrew Morton 已提交
912
	if (register_chrdev(MEM_MAJOR, "mem", &memory_fops))
L
Linus Torvalds 已提交
913 914
		printk("unable to get major %d for memory devs\n", MEM_MAJOR);

915
	mem_class = class_create(THIS_MODULE, "mem");
916 917 918
	if (IS_ERR(mem_class))
		return PTR_ERR(mem_class);

919
	mem_class->devnode = mem_devnode;
920 921 922 923 924 925
	for (minor = 1; minor < ARRAY_SIZE(devlist); minor++) {
		if (!devlist[minor].name)
			continue;
		device_create(mem_class, NULL, MKDEV(MEM_MAJOR, minor),
			      NULL, devlist[minor].name);
	}
926

927
	return tty_init();
L
Linus Torvalds 已提交
928 929 930
}

fs_initcall(chr_dev_init);