From b5a280c00840f4ebc2f40afb4f74e37e34d19f7c Mon Sep 17 00:00:00 2001
From: Le Tan <tamlokveer@gmail.com>
Date: Sat, 16 Aug 2014 13:55:44 +0800
Subject: [PATCH] intel-iommu: add IOTLB using hash table

Add IOTLB to cache information about the translation of input-addresses. IOTLB
use a GHashTable as cache. The key of the hash table is the logical-OR of gfn
and source id after left-shifting.

Signed-off-by: Le Tan <tamlokveer@gmail.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/i386/intel_iommu.c          | 213 ++++++++++++++++++++++++++++++++-
 hw/i386/intel_iommu_internal.h |  34 +++++-
 include/hw/i386/intel_iommu.h  |  11 +-
 3 files changed, 251 insertions(+), 7 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index c514310e79..0a4282adf3 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -132,6 +132,35 @@ static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr,
     return new_val;
 }
 
+/* GHashTable functions */
+static gboolean vtd_uint64_equal(gconstpointer v1, gconstpointer v2)
+{
+    return *((const uint64_t *)v1) == *((const uint64_t *)v2);
+}
+
+static guint vtd_uint64_hash(gconstpointer v)
+{
+    return (guint)*(const uint64_t *)v;
+}
+
+static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
+                                          gpointer user_data)
+{
+    VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
+    uint16_t domain_id = *(uint16_t *)user_data;
+    return entry->domain_id == domain_id;
+}
+
+static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
+                                        gpointer user_data)
+{
+    VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
+    VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
+    uint64_t gfn = info->gfn & info->mask;
+    return (entry->domain_id == info->domain_id) &&
+            ((entry->gfn & info->mask) == gfn);
+}
+
 /* Reset all the gen of VTDAddressSpace to zero and set the gen of
  * IntelIOMMUState to 1.
  */
@@ -159,6 +188,48 @@ static void vtd_reset_context_cache(IntelIOMMUState *s)
     s->context_cache_gen = 1;
 }
 
+static void vtd_reset_iotlb(IntelIOMMUState *s)
+{
+    assert(s->iotlb);
+    g_hash_table_remove_all(s->iotlb);
+}
+
+static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id,
+                                       hwaddr addr)
+{
+    uint64_t key;
+
+    key = (addr >> VTD_PAGE_SHIFT_4K) |
+           ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT);
+    return g_hash_table_lookup(s->iotlb, &key);
+
+}
+
+static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
+                             uint16_t domain_id, hwaddr addr, uint64_t slpte,
+                             bool read_flags, bool write_flags)
+{
+    VTDIOTLBEntry *entry = g_malloc(sizeof(*entry));
+    uint64_t *key = g_malloc(sizeof(*key));
+    uint64_t gfn = addr >> VTD_PAGE_SHIFT_4K;
+
+    VTD_DPRINTF(CACHE, "update iotlb sid 0x%"PRIx16 " gpa 0x%"PRIx64
+                " slpte 0x%"PRIx64 " did 0x%"PRIx16, source_id, addr, slpte,
+                domain_id);
+    if (g_hash_table_size(s->iotlb) >= VTD_IOTLB_MAX_SIZE) {
+        VTD_DPRINTF(CACHE, "iotlb exceeds size limit, forced to reset");
+        vtd_reset_iotlb(s);
+    }
+
+    entry->gfn = gfn;
+    entry->domain_id = domain_id;
+    entry->slpte = slpte;
+    entry->read_flags = read_flags;
+    entry->write_flags = write_flags;
+    *key = gfn | ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT);
+    g_hash_table_replace(s->iotlb, key, entry);
+}
+
 /* Given the reg addr of both the message data and address, generate an
  * interrupt via MSI.
  */
@@ -693,6 +764,7 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, uint8_t bus_num,
     bool is_fpd_set = false;
     bool reads = true;
     bool writes = true;
+    VTDIOTLBEntry *iotlb_entry;
 
     /* Check if the request is in interrupt address range */
     if (vtd_is_interrupt_addr(addr)) {
@@ -716,6 +788,17 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, uint8_t bus_num,
             return;
         }
     }
+    /* Try to fetch slpte form IOTLB */
+    iotlb_entry = vtd_lookup_iotlb(s, source_id, addr);
+    if (iotlb_entry) {
+        VTD_DPRINTF(CACHE, "hit iotlb sid 0x%"PRIx16 " gpa 0x%"PRIx64
+                    " slpte 0x%"PRIx64 " did 0x%"PRIx16, source_id, addr,
+                    iotlb_entry->slpte, iotlb_entry->domain_id);
+        slpte = iotlb_entry->slpte;
+        reads = iotlb_entry->read_flags;
+        writes = iotlb_entry->write_flags;
+        goto out;
+    }
     /* Try to fetch context-entry from cache first */
     if (cc_entry->context_cache_gen == s->context_cache_gen) {
         VTD_DPRINTF(CACHE, "hit context-cache bus %d devfn %d "
@@ -760,6 +843,9 @@ static void vtd_do_iommu_translate(VTDAddressSpace *vtd_as, uint8_t bus_num,
         return;
     }
 
+    vtd_update_iotlb(s, source_id, VTD_CONTEXT_ENTRY_DID(ce.hi), addr, slpte,
+                     reads, writes);
+out:
     entry->iova = addr & VTD_PAGE_MASK_4K;
     entry->translated_addr = vtd_get_slpte_addr(slpte) & VTD_PAGE_MASK_4K;
     entry->addr_mask = ~VTD_PAGE_MASK_4K;
@@ -859,6 +945,29 @@ static uint64_t vtd_context_cache_invalidate(IntelIOMMUState *s, uint64_t val)
     return caig;
 }
 
+static void vtd_iotlb_global_invalidate(IntelIOMMUState *s)
+{
+    vtd_reset_iotlb(s);
+}
+
+static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
+{
+    g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain,
+                                &domain_id);
+}
+
+static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
+                                      hwaddr addr, uint8_t am)
+{
+    VTDIOTLBPageInvInfo info;
+
+    assert(am <= VTD_MAMV);
+    info.domain_id = domain_id;
+    info.gfn = addr >> VTD_PAGE_SHIFT_4K;
+    info.mask = ~((1 << am) - 1);
+    g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info);
+}
+
 /* Flush IOTLB
  * Returns the IOTLB Actual Invalidation Granularity.
  * @val: the content of the IOTLB_REG
@@ -867,25 +976,44 @@ static uint64_t vtd_iotlb_flush(IntelIOMMUState *s, uint64_t val)
 {
     uint64_t iaig;
     uint64_t type = val & VTD_TLB_FLUSH_GRANU_MASK;
+    uint16_t domain_id;
+    hwaddr addr;
+    uint8_t am;
 
     switch (type) {
     case VTD_TLB_GLOBAL_FLUSH:
-        VTD_DPRINTF(INV, "Global IOTLB flush");
+        VTD_DPRINTF(INV, "global invalidation");
         iaig = VTD_TLB_GLOBAL_FLUSH_A;
+        vtd_iotlb_global_invalidate(s);
         break;
 
     case VTD_TLB_DSI_FLUSH:
-        VTD_DPRINTF(INV, "Domain-selective IOTLB flush");
+        domain_id = VTD_TLB_DID(val);
+        VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16,
+                    domain_id);
         iaig = VTD_TLB_DSI_FLUSH_A;
+        vtd_iotlb_domain_invalidate(s, domain_id);
         break;
 
     case VTD_TLB_PSI_FLUSH:
-        VTD_DPRINTF(INV, "Page-selective-within-domain IOTLB flush");
+        domain_id = VTD_TLB_DID(val);
+        addr = vtd_get_quad_raw(s, DMAR_IVA_REG);
+        am = VTD_IVA_AM(addr);
+        addr = VTD_IVA_ADDR(addr);
+        VTD_DPRINTF(INV, "page-selective invalidation domain 0x%"PRIx16
+                    " addr 0x%"PRIx64 " mask %"PRIu8, domain_id, addr, am);
+        if (am > VTD_MAMV) {
+            VTD_DPRINTF(GENERAL, "error: supported max address mask value is "
+                        "%"PRIu8, (uint8_t)VTD_MAMV);
+            iaig = 0;
+            break;
+        }
         iaig = VTD_TLB_PSI_FLUSH_A;
+        vtd_iotlb_page_invalidate(s, domain_id, addr, am);
         break;
 
     default:
-        VTD_DPRINTF(GENERAL, "error: wrong iotlb flush granularity");
+        VTD_DPRINTF(GENERAL, "error: invalid granularity");
         iaig = 0;
     }
     return iaig;
@@ -1123,6 +1251,56 @@ static bool vtd_process_context_cache_desc(IntelIOMMUState *s,
     return true;
 }
 
+static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
+{
+    uint16_t domain_id;
+    uint8_t am;
+    hwaddr addr;
+
+    if ((inv_desc->lo & VTD_INV_DESC_IOTLB_RSVD_LO) ||
+        (inv_desc->hi & VTD_INV_DESC_IOTLB_RSVD_HI)) {
+        VTD_DPRINTF(GENERAL, "error: non-zero reserved field in IOTLB "
+                    "Invalidate Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
+                    inv_desc->hi, inv_desc->lo);
+        return false;
+    }
+
+    switch (inv_desc->lo & VTD_INV_DESC_IOTLB_G) {
+    case VTD_INV_DESC_IOTLB_GLOBAL:
+        VTD_DPRINTF(INV, "global invalidation");
+        vtd_iotlb_global_invalidate(s);
+        break;
+
+    case VTD_INV_DESC_IOTLB_DOMAIN:
+        domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo);
+        VTD_DPRINTF(INV, "domain-selective invalidation domain 0x%"PRIx16,
+                    domain_id);
+        vtd_iotlb_domain_invalidate(s, domain_id);
+        break;
+
+    case VTD_INV_DESC_IOTLB_PAGE:
+        domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo);
+        addr = VTD_INV_DESC_IOTLB_ADDR(inv_desc->hi);
+        am = VTD_INV_DESC_IOTLB_AM(inv_desc->hi);
+        VTD_DPRINTF(INV, "page-selective invalidation domain 0x%"PRIx16
+                    " addr 0x%"PRIx64 " mask %"PRIu8, domain_id, addr, am);
+        if (am > VTD_MAMV) {
+            VTD_DPRINTF(GENERAL, "error: supported max address mask value is "
+                        "%"PRIu8, (uint8_t)VTD_MAMV);
+            return false;
+        }
+        vtd_iotlb_page_invalidate(s, domain_id, addr, am);
+        break;
+
+    default:
+        VTD_DPRINTF(GENERAL, "error: invalid granularity in IOTLB Invalidate "
+                    "Descriptor hi 0x%"PRIx64 " lo 0x%"PRIx64,
+                    inv_desc->hi, inv_desc->lo);
+        return false;
+    }
+    return true;
+}
+
 static bool vtd_process_inv_desc(IntelIOMMUState *s)
 {
     VTDInvDesc inv_desc;
@@ -1149,6 +1327,9 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
     case VTD_INV_DESC_IOTLB:
         VTD_DPRINTF(INV, "IOTLB Invalidate Descriptor hi 0x%"PRIx64
                     " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo);
+        if (!vtd_process_iotlb_desc(s, &inv_desc)) {
+            return false;
+        }
         break;
 
     case VTD_INV_DESC_WAIT:
@@ -1382,6 +1563,24 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
         vtd_handle_iotlb_write(s);
         break;
 
+    /* Invalidate Address Register, 64-bit */
+    case DMAR_IVA_REG:
+        VTD_DPRINTF(INV, "DMAR_IVA_REG write addr 0x%"PRIx64
+                    ", size %d, val 0x%"PRIx64, addr, size, val);
+        if (size == 4) {
+            vtd_set_long(s, addr, val);
+        } else {
+            vtd_set_quad(s, addr, val);
+        }
+        break;
+
+    case DMAR_IVA_REG_HI:
+        VTD_DPRINTF(INV, "DMAR_IVA_REG_HI write addr 0x%"PRIx64
+                    ", size %d, val 0x%"PRIx64, addr, size, val);
+        assert(size == 4);
+        vtd_set_long(s, addr, val);
+        break;
+
     /* Fault Status Register, 32-bit */
     case DMAR_FSTS_REG:
         VTD_DPRINTF(FLOG, "DMAR_FSTS_REG write addr 0x%"PRIx64
@@ -1658,10 +1857,11 @@ static void vtd_init(IntelIOMMUState *s)
     s->iq_last_desc_type = VTD_INV_DESC_NONE;
     s->next_frcd_reg = 0;
     s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MGAW |
-             VTD_CAP_SAGAW;
+             VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI;
     s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
 
     vtd_reset_context_cache(s);
+    vtd_reset_iotlb(s);
 
     /* Define registers with default values and bit semantics */
     vtd_define_long(s, DMAR_VER_REG, 0x10UL, 0, 0);
@@ -1731,6 +1931,9 @@ static void vtd_realize(DeviceState *dev, Error **errp)
     memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
                           "intel_iommu", DMAR_REG_SIZE);
     sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->csrmem);
+    /* No corresponding destroy */
+    s->iotlb = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
+                                     g_free, g_free);
     vtd_init(s);
 }
 
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 30c318dc9c..ba288ab1d9 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -111,6 +111,10 @@
 #define VTD_INTERRUPT_ADDR_FIRST    0xfee00000ULL
 #define VTD_INTERRUPT_ADDR_LAST     0xfeefffffULL
 
+/* The shift of source_id in the key of IOTLB hash table */
+#define VTD_IOTLB_SID_SHIFT         36
+#define VTD_IOTLB_MAX_SIZE          1024    /* Max size of the hash table */
+
 /* IOTLB_REG */
 #define VTD_TLB_GLOBAL_FLUSH        (1ULL << 60) /* Global invalidation */
 #define VTD_TLB_DSI_FLUSH           (2ULL << 60) /* Domain-selective */
@@ -121,6 +125,11 @@
 #define VTD_TLB_PSI_FLUSH_A         (3ULL << 57)
 #define VTD_TLB_FLUSH_GRANU_MASK_A  (3ULL << 57)
 #define VTD_TLB_IVT                 (1ULL << 63)
+#define VTD_TLB_DID(val)            (((val) >> 32) & VTD_DOMAIN_ID_MASK)
+
+/* IVA_REG */
+#define VTD_IVA_ADDR(val)       ((val) & ~0xfffULL & ((1ULL << VTD_MGAW) - 1))
+#define VTD_IVA_AM(val)         ((val) & 0x3fULL)
 
 /* GCMD_REG */
 #define VTD_GCMD_TE                 (1UL << 31)
@@ -176,6 +185,9 @@
 #define VTD_CAP_ND                  (((VTD_DOMAIN_ID_SHIFT - 4) / 2) & 7ULL)
 #define VTD_MGAW                    39  /* Maximum Guest Address Width */
 #define VTD_CAP_MGAW                (((VTD_MGAW - 1) & 0x3fULL) << 16)
+#define VTD_MAMV                    9ULL
+#define VTD_CAP_MAMV                (VTD_MAMV << 48)
+#define VTD_CAP_PSI                 (1ULL << 39)
 
 /* Supported Adjusted Guest Address Widths */
 #define VTD_CAP_SAGAW_SHIFT         8
@@ -293,6 +305,26 @@ typedef struct VTDInvDesc VTDInvDesc;
 #define VTD_INV_DESC_CC_FM(val)         (((val) >> 48) & 3UL)
 #define VTD_INV_DESC_CC_RSVD            0xfffc00000000ffc0ULL
 
+/* Masks for IOTLB Invalidate Descriptor */
+#define VTD_INV_DESC_IOTLB_G            (3ULL << 4)
+#define VTD_INV_DESC_IOTLB_GLOBAL       (1ULL << 4)
+#define VTD_INV_DESC_IOTLB_DOMAIN       (2ULL << 4)
+#define VTD_INV_DESC_IOTLB_PAGE         (3ULL << 4)
+#define VTD_INV_DESC_IOTLB_DID(val)     (((val) >> 16) & VTD_DOMAIN_ID_MASK)
+#define VTD_INV_DESC_IOTLB_ADDR(val)    ((val) & ~0xfffULL & \
+                                         ((1ULL << VTD_MGAW) - 1))
+#define VTD_INV_DESC_IOTLB_AM(val)      ((val) & 0x3fULL)
+#define VTD_INV_DESC_IOTLB_RSVD_LO      0xffffffff0000ff00ULL
+#define VTD_INV_DESC_IOTLB_RSVD_HI      0xf80ULL
+
+/* Information about page-selective IOTLB invalidate */
+struct VTDIOTLBPageInvInfo {
+    uint16_t domain_id;
+    uint64_t gfn;
+    uint8_t mask;
+};
+typedef struct VTDIOTLBPageInvInfo VTDIOTLBPageInvInfo;
+
 /* Pagesize of VTD paging structures, including root and context tables */
 #define VTD_PAGE_SHIFT              12
 #define VTD_PAGE_SIZE               (1ULL << VTD_PAGE_SHIFT)
@@ -330,7 +362,7 @@ typedef struct VTDRootEntry VTDRootEntry;
 #define VTD_CONTEXT_ENTRY_RSVD_LO   (0xff0ULL | ~VTD_HAW_MASK)
 /* hi */
 #define VTD_CONTEXT_ENTRY_AW        7ULL /* Adjusted guest-address-width */
-#define VTD_CONTEXT_ENTRY_DID       (0xffffULL << 8) /* Domain Identifier */
+#define VTD_CONTEXT_ENTRY_DID(val)  (((val) >> 8) & VTD_DOMAIN_ID_MASK)
 #define VTD_CONTEXT_ENTRY_RSVD_HI   0xffffffffff000080ULL
 
 #define VTD_CONTEXT_ENTRY_NR        (VTD_PAGE_SIZE / sizeof(VTDContextEntry))
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index d9a5215a15..f4701e1c60 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -48,7 +48,7 @@ typedef struct VTDContextEntry VTDContextEntry;
 typedef struct VTDContextCacheEntry VTDContextCacheEntry;
 typedef struct IntelIOMMUState IntelIOMMUState;
 typedef struct VTDAddressSpace VTDAddressSpace;
-
+typedef struct VTDIOTLBEntry VTDIOTLBEntry;
 
 /* Context-Entry */
 struct VTDContextEntry {
@@ -73,6 +73,14 @@ struct VTDAddressSpace {
     VTDContextCacheEntry context_cache_entry;
 };
 
+struct VTDIOTLBEntry {
+    uint64_t gfn;
+    uint16_t domain_id;
+    uint64_t slpte;
+    bool read_flags;
+    bool write_flags;
+};
+
 /* The iommu (DMAR) device state struct */
 struct IntelIOMMUState {
     SysBusDevice busdev;
@@ -103,6 +111,7 @@ struct IntelIOMMUState {
     uint64_t ecap;                  /* The value of extended capability reg */
 
     uint32_t context_cache_gen;     /* Should be in [1,MAX] */
+    GHashTable *iotlb;              /* IOTLB */
 
     MemoryRegionIOMMUOps iommu_ops;
     VTDAddressSpace **address_spaces[VTD_PCI_BUS_MAX];
-- 
GitLab