diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index 6e56af23957b6d835274139fd7859f193c4b82d0..3544f5d020c675e91079795f388ebd8235f8acc5 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -2519,6 +2519,20 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words)
 #endif /* __BIG_ENDIAN */
 }
 
+/**
+ *	ata_mmio_data_xfer - Transfer data by MMIO
+ *	@ap: port to read/write
+ *	@buf: data buffer
+ *	@buflen: buffer length
+ *	@do_write: read/write
+ *
+ *	Transfer data from/to the device data register by MMIO.
+ *
+ *	LOCKING:
+ *	Inherited from caller.
+ *
+ */
+
 static void ata_mmio_data_xfer(struct ata_port *ap, unsigned char *buf,
 			       unsigned int buflen, int write_data)
 {
@@ -2527,6 +2541,7 @@ static void ata_mmio_data_xfer(struct ata_port *ap, unsigned char *buf,
 	u16 *buf16 = (u16 *) buf;
 	void __iomem *mmio = (void __iomem *)ap->ioaddr.data_addr;
 
+	/* Transfer multiple of 2 bytes */
 	if (write_data) {
 		for (i = 0; i < words; i++)
 			writew(le16_to_cpu(buf16[i]), mmio);
@@ -2534,19 +2549,76 @@ static void ata_mmio_data_xfer(struct ata_port *ap, unsigned char *buf,
 		for (i = 0; i < words; i++)
 			buf16[i] = cpu_to_le16(readw(mmio));
 	}
+
+	/* Transfer trailing 1 byte, if any. */
+	if (unlikely(buflen & 0x01)) {
+		u16 align_buf[1] = { 0 };
+		unsigned char *trailing_buf = buf + buflen - 1;
+
+		if (write_data) {
+			memcpy(align_buf, trailing_buf, 1);
+			writew(le16_to_cpu(align_buf[0]), mmio);
+		} else {
+			align_buf[0] = cpu_to_le16(readw(mmio));
+			memcpy(trailing_buf, align_buf, 1);
+		}
+	}
 }
 
+/**
+ *	ata_pio_data_xfer - Transfer data by PIO
+ *	@ap: port to read/write
+ *	@buf: data buffer
+ *	@buflen: buffer length
+ *	@do_write: read/write
+ *
+ *	Transfer data from/to the device data register by PIO.
+ *
+ *	LOCKING:
+ *	Inherited from caller.
+ *
+ */
+
 static void ata_pio_data_xfer(struct ata_port *ap, unsigned char *buf,
 			      unsigned int buflen, int write_data)
 {
-	unsigned int dwords = buflen >> 1;
+	unsigned int words = buflen >> 1;
 
+	/* Transfer multiple of 2 bytes */
 	if (write_data)
-		outsw(ap->ioaddr.data_addr, buf, dwords);
+		outsw(ap->ioaddr.data_addr, buf, words);
 	else
-		insw(ap->ioaddr.data_addr, buf, dwords);
+		insw(ap->ioaddr.data_addr, buf, words);
+
+	/* Transfer trailing 1 byte, if any. */
+	if (unlikely(buflen & 0x01)) {
+		u16 align_buf[1] = { 0 };
+		unsigned char *trailing_buf = buf + buflen - 1;
+
+		if (write_data) {
+			memcpy(align_buf, trailing_buf, 1);
+			outw(le16_to_cpu(align_buf[0]), ap->ioaddr.data_addr);
+		} else {
+			align_buf[0] = cpu_to_le16(inw(ap->ioaddr.data_addr));
+			memcpy(trailing_buf, align_buf, 1);
+		}
+	}
 }
 
+/**
+ *	ata_data_xfer - Transfer data from/to the data register.
+ *	@ap: port to read/write
+ *	@buf: data buffer
+ *	@buflen: buffer length
+ *	@do_write: read/write
+ *
+ *	Transfer data from/to the device data register.
+ *
+ *	LOCKING:
+ *	Inherited from caller.
+ *
+ */
+
 static void ata_data_xfer(struct ata_port *ap, unsigned char *buf,
 			  unsigned int buflen, int do_write)
 {
@@ -2556,6 +2628,16 @@ static void ata_data_xfer(struct ata_port *ap, unsigned char *buf,
 		ata_pio_data_xfer(ap, buf, buflen, do_write);
 }
 
+/**
+ *	ata_pio_sector - Transfer ATA_SECT_SIZE (512 bytes) of data.
+ *	@qc: Command on going
+ *
+ *	Transfer ATA_SECT_SIZE of data from/to the ATA device.
+ *
+ *	LOCKING:
+ *	Inherited from caller.
+ */
+
 static void ata_pio_sector(struct ata_queued_cmd *qc)
 {
 	int do_write = (qc->tf.flags & ATA_TFLAG_WRITE);
@@ -2594,6 +2676,18 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
 	kunmap(page);
 }
 
+/**
+ *	__atapi_pio_bytes - Transfer data from/to the ATAPI device.
+ *	@qc: Command on going
+ *	@bytes: number of bytes
+ *
+ *	Transfer Transfer data from/to the ATAPI device.
+ *
+ *	LOCKING:
+ *	Inherited from caller.
+ *
+ */
+
 static void __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes)
 {
 	int do_write = (qc->tf.flags & ATA_TFLAG_WRITE);
@@ -2603,10 +2697,33 @@ static void __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes)
 	unsigned char *buf;
 	unsigned int offset, count;
 
-	if (qc->curbytes == qc->nbytes - bytes)
+	if (qc->curbytes + bytes >= qc->nbytes)
 		ap->pio_task_state = PIO_ST_LAST;
 
 next_sg:
+	if (unlikely(qc->cursg >= qc->n_elem)) {
+		/* 
+		 * The end of qc->sg is reached and the device expects
+		 * more data to transfer. In order not to overrun qc->sg
+		 * and fulfill length specified in the byte count register,
+		 *    - for read case, discard trailing data from the device
+		 *    - for write case, padding zero data to the device
+		 */
+		u16 pad_buf[1] = { 0 };
+		unsigned int words = bytes >> 1;
+		unsigned int i;
+
+		if (words) /* warning if bytes > 1 */
+			printk(KERN_WARNING "ata%u: %u bytes trailing data\n", 
+			       ap->id, bytes);
+
+		for (i = 0; i < words; i++)
+			ata_data_xfer(ap, (unsigned char*)pad_buf, 2, do_write);
+
+		ap->pio_task_state = PIO_ST_LAST;
+		return;
+	}
+
 	sg = &qc->sg[qc->cursg];
 
 	page = sg->page;
@@ -2640,11 +2757,21 @@ static void __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes)
 
 	kunmap(page);
 
-	if (bytes) {
+	if (bytes)
 		goto next_sg;
-	}
 }
 
+/**
+ *	atapi_pio_bytes - Transfer data from/to the ATAPI device.
+ *	@qc: Command on going
+ *
+ *	Transfer Transfer data from/to the ATAPI device.
+ *
+ *	LOCKING:
+ *	Inherited from caller.
+ *
+ */
+
 static void atapi_pio_bytes(struct ata_queued_cmd *qc)
 {
 	struct ata_port *ap = qc->ap;