diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 691e023695ad07e1ddda205e0d09c2678be8a0a2..679f2a8e22ee562833fbf8ce3715ec0331fcc3b4 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -203,6 +203,14 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
 	return 0;
 }
 
+/* Flush the whole IO/TLB for a given protection domain */
+static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
+{
+	u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
+
+	iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
+}
+
 /****************************************************************************
  *
  * The functions below are used the create the page table mappings for
@@ -386,14 +394,18 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev,
 			PAGE_SIZE) >> PAGE_SHIFT;
 	limit = limit < size ? limit : size;
 
-	if (dom->next_bit >= limit)
+	if (dom->next_bit >= limit) {
 		dom->next_bit = 0;
+		dom->need_flush = true;
+	}
 
 	address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages,
 			0 , boundary_size, 0);
-	if (address == -1)
+	if (address == -1) {
 		address = iommu_area_alloc(dom->bitmap, limit, 0, pages,
 				0, boundary_size, 0);
+		dom->need_flush = true;
+	}
 
 	if (likely(address != -1)) {
 		dom->next_bit = address + pages;
@@ -553,6 +565,8 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
 	dma_dom->bitmap[0] = 1;
 	dma_dom->next_bit = 0;
 
+	dma_dom->need_flush = false;
+
 	/* Intialize the exclusion range if necessary */
 	if (iommu->exclusion_start &&
 	    iommu->exclusion_start < dma_dom->aperture_size) {
@@ -795,7 +809,10 @@ static dma_addr_t __map_single(struct device *dev,
 	}
 	address += offset;
 
-	if (unlikely(iommu_has_npcache(iommu)))
+	if (unlikely(dma_dom->need_flush && !iommu_fullflush)) {
+		iommu_flush_tlb(iommu, dma_dom->domain.id);
+		dma_dom->need_flush = false;
+	} else if (unlikely(iommu_has_npcache(iommu)))
 		iommu_flush_pages(iommu, dma_dom->domain.id, address, size);
 
 out:
@@ -829,7 +846,8 @@ static void __unmap_single(struct amd_iommu *iommu,
 
 	dma_ops_free_addresses(dma_dom, dma_addr, pages);
 
-	iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size);
+	if (iommu_fullflush)
+		iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size);
 }
 
 /*
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index a69cc0f5204286a9ead45b580e2f506c00a88f2a..f2fa8dc81bebda03b6727f80633c5c1593f68fbf 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -995,6 +995,11 @@ int __init amd_iommu_init(void)
 	else
 		printk("disabled\n");
 
+	if (iommu_fullflush)
+		printk(KERN_INFO "AMD IOMMU: IO/TLB flush on unmap enabled\n");
+	else
+		printk(KERN_INFO "AMD IOMMU: Lazy IO/TLB flushing enabled\n");
+
 out:
 	return ret;
 
@@ -1057,7 +1062,7 @@ void __init amd_iommu_detect(void)
 static int __init parse_amd_iommu_options(char *str)
 {
 	for (; *str; ++str) {
-		if (strcmp(str, "isolate") == 0)
+		if (strncmp(str, "isolate", 7) == 0)
 			amd_iommu_isolate = 1;
 	}
 
diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h
index dcc812067394fd5e366810d0d57eb2dbf6f05d70..dcc472445ffd7d7fb44ec58ba80eef6927c62888 100644
--- a/include/asm-x86/amd_iommu_types.h
+++ b/include/asm-x86/amd_iommu_types.h
@@ -196,6 +196,9 @@ struct dma_ops_domain {
 	 * just calculate its address in constant time.
 	 */
 	u64 **pte_pages;
+
+	/* This will be set to true when TLB needs to be flushed */
+	bool need_flush;
 };
 
 /*