summaryrefslogtreecommitdiffstats
path: root/kernel/drivers/iommu
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/drivers/iommu')
-rw-r--r--kernel/drivers/iommu/amd_iommu.c132
-rw-r--r--kernel/drivers/iommu/amd_iommu_init.c14
-rw-r--r--kernel/drivers/iommu/amd_iommu_v2.c4
-rw-r--r--kernel/drivers/iommu/arm-smmu-v3.c8
-rw-r--r--kernel/drivers/iommu/dma-iommu.c7
-rw-r--r--kernel/drivers/iommu/dmar.c7
-rw-r--r--kernel/drivers/iommu/exynos-iommu.c1
-rw-r--r--kernel/drivers/iommu/intel-iommu.c91
-rw-r--r--kernel/drivers/iommu/intel-svm.c28
-rw-r--r--kernel/drivers/iommu/iommu.c3
10 files changed, 240 insertions, 55 deletions
diff --git a/kernel/drivers/iommu/amd_iommu.c b/kernel/drivers/iommu/amd_iommu.c
index f417aafea..b08732bae 100644
--- a/kernel/drivers/iommu/amd_iommu.c
+++ b/kernel/drivers/iommu/amd_iommu.c
@@ -91,6 +91,7 @@ struct iommu_dev_data {
struct list_head dev_data_list; /* For global dev_data_list */
struct protection_domain *domain; /* Domain the device is bound to */
u16 devid; /* PCI Device ID */
+ u16 alias; /* Alias Device ID */
bool iommu_v2; /* Device can make use of IOMMUv2 */
bool passthrough; /* Device is identity mapped */
struct {
@@ -125,6 +126,13 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom)
return container_of(dom, struct protection_domain, domain);
}
+static inline u16 get_device_id(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ return PCI_DEVID(pdev->bus->number, pdev->devfn);
+}
+
static struct iommu_dev_data *alloc_dev_data(u16 devid)
{
struct iommu_dev_data *dev_data;
@@ -162,6 +170,68 @@ out_unlock:
return dev_data;
}
+static int __last_alias(struct pci_dev *pdev, u16 alias, void *data)
+{
+ *(u16 *)data = alias;
+ return 0;
+}
+
+static u16 get_alias(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ u16 devid, ivrs_alias, pci_alias;
+
+ devid = get_device_id(dev);
+ ivrs_alias = amd_iommu_alias_table[devid];
+ pci_for_each_dma_alias(pdev, __last_alias, &pci_alias);
+
+ if (ivrs_alias == pci_alias)
+ return ivrs_alias;
+
+ /*
+ * DMA alias showdown
+ *
+ * The IVRS is fairly reliable in telling us about aliases, but it
+ * can't know about every screwy device. If we don't have an IVRS
+ * reported alias, use the PCI reported alias. In that case we may
+ * still need to initialize the rlookup and dev_table entries if the
+ * alias is to a non-existent device.
+ */
+ if (ivrs_alias == devid) {
+ if (!amd_iommu_rlookup_table[pci_alias]) {
+ amd_iommu_rlookup_table[pci_alias] =
+ amd_iommu_rlookup_table[devid];
+ memcpy(amd_iommu_dev_table[pci_alias].data,
+ amd_iommu_dev_table[devid].data,
+ sizeof(amd_iommu_dev_table[pci_alias].data));
+ }
+
+ return pci_alias;
+ }
+
+ pr_info("AMD-Vi: Using IVRS reported alias %02x:%02x.%d "
+ "for device %s[%04x:%04x], kernel reported alias "
+ "%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias),
+ PCI_FUNC(ivrs_alias), dev_name(dev), pdev->vendor, pdev->device,
+ PCI_BUS_NUM(pci_alias), PCI_SLOT(pci_alias),
+ PCI_FUNC(pci_alias));
+
+ /*
+ * If we don't have a PCI DMA alias and the IVRS alias is on the same
+ * bus, then the IVRS table may know about a quirk that we don't.
+ */
+ if (pci_alias == devid &&
+ PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) {
+ pdev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN;
+ pdev->dma_alias_devfn = ivrs_alias & 0xff;
+ pr_info("AMD-Vi: Added PCI DMA alias %02x.%d for %s\n",
+ PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias),
+ dev_name(dev));
+ }
+
+ return ivrs_alias;
+}
+
static struct iommu_dev_data *find_dev_data(u16 devid)
{
struct iommu_dev_data *dev_data;
@@ -174,13 +244,6 @@ static struct iommu_dev_data *find_dev_data(u16 devid)
return dev_data;
}
-static inline u16 get_device_id(struct device *dev)
-{
- struct pci_dev *pdev = to_pci_dev(dev);
-
- return PCI_DEVID(pdev->bus->number, pdev->devfn);
-}
-
static struct iommu_dev_data *get_dev_data(struct device *dev)
{
return dev->archdata.iommu;
@@ -289,9 +352,11 @@ static void init_iommu_group(struct device *dev)
if (!domain)
goto out;
- dma_domain = to_pdomain(domain)->priv;
+ if (to_pdomain(domain)->flags == PD_DMA_OPS_MASK) {
+ dma_domain = to_pdomain(domain)->priv;
+ init_unity_mappings_for_device(dev, dma_domain);
+ }
- init_unity_mappings_for_device(dev, dma_domain);
out:
iommu_group_put(group);
}
@@ -308,6 +373,8 @@ static int iommu_init_device(struct device *dev)
if (!dev_data)
return -ENOMEM;
+ dev_data->alias = get_alias(dev);
+
if (pci_iommuv2_capable(pdev)) {
struct amd_iommu *iommu;
@@ -328,7 +395,7 @@ static void iommu_ignore_device(struct device *dev)
u16 devid, alias;
devid = get_device_id(dev);
- alias = amd_iommu_alias_table[devid];
+ alias = get_alias(dev);
memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry));
memset(&amd_iommu_dev_table[alias], 0, sizeof(struct dev_table_entry));
@@ -859,7 +926,7 @@ again:
next_tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
left = (head - next_tail) % CMD_BUFFER_SIZE;
- if (left <= 2) {
+ if (left <= 0x20) {
struct iommu_cmd sync_cmd;
volatile u64 sem = 0;
int ret;
@@ -1017,7 +1084,7 @@ static int device_flush_dte(struct iommu_dev_data *dev_data)
int ret;
iommu = amd_iommu_rlookup_table[dev_data->devid];
- alias = amd_iommu_alias_table[dev_data->devid];
+ alias = dev_data->alias;
ret = iommu_flush_dte(iommu, dev_data->devid);
if (!ret && alias != dev_data->devid)
@@ -1766,6 +1833,9 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
kfree(dom->aperture[i]);
}
+ if (dom->domain.id)
+ domain_id_free(dom->domain.id);
+
kfree(dom);
}
@@ -1891,7 +1961,7 @@ static void do_attach(struct iommu_dev_data *dev_data,
bool ats;
iommu = amd_iommu_rlookup_table[dev_data->devid];
- alias = amd_iommu_alias_table[dev_data->devid];
+ alias = dev_data->alias;
ats = dev_data->ats.enabled;
/* Update data structures */
@@ -1925,7 +1995,7 @@ static void do_detach(struct iommu_dev_data *dev_data)
return;
iommu = amd_iommu_rlookup_table[dev_data->devid];
- alias = amd_iommu_alias_table[dev_data->devid];
+ alias = dev_data->alias;
/* decrease reference counters */
dev_data->domain->dev_iommu[iommu->index] -= 1;
@@ -2257,8 +2327,15 @@ static void update_device_table(struct protection_domain *domain)
{
struct iommu_dev_data *dev_data;
- list_for_each_entry(dev_data, &domain->dev_list, list)
+ list_for_each_entry(dev_data, &domain->dev_list, list) {
set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled);
+
+ if (dev_data->devid == dev_data->alias)
+ continue;
+
+ /* There is an alias, update device table entry for it */
+ set_dte_entry(dev_data->alias, domain, dev_data->ats.enabled);
+ }
}
static void update_domain(struct protection_domain *domain)
@@ -2905,9 +2982,7 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
static void amd_iommu_domain_free(struct iommu_domain *dom)
{
struct protection_domain *domain;
-
- if (!dom)
- return;
+ struct dma_ops_domain *dma_dom;
domain = to_pdomain(dom);
@@ -2916,13 +2991,24 @@ static void amd_iommu_domain_free(struct iommu_domain *dom)
BUG_ON(domain->dev_cnt != 0);
- if (domain->mode != PAGE_MODE_NONE)
- free_pagetable(domain);
+ if (!dom)
+ return;
- if (domain->flags & PD_IOMMUV2_MASK)
- free_gcr3_table(domain);
+ switch (dom->type) {
+ case IOMMU_DOMAIN_DMA:
+ dma_dom = domain->priv;
+ dma_ops_domain_free(dma_dom);
+ break;
+ default:
+ if (domain->mode != PAGE_MODE_NONE)
+ free_pagetable(domain);
- protection_domain_free(domain);
+ if (domain->flags & PD_IOMMUV2_MASK)
+ free_gcr3_table(domain);
+
+ protection_domain_free(domain);
+ break;
+ }
}
static void amd_iommu_detach_device(struct iommu_domain *dom,
diff --git a/kernel/drivers/iommu/amd_iommu_init.c b/kernel/drivers/iommu/amd_iommu_init.c
index bf4959f42..94f1bf772 100644
--- a/kernel/drivers/iommu/amd_iommu_init.c
+++ b/kernel/drivers/iommu/amd_iommu_init.c
@@ -1363,13 +1363,23 @@ static int __init amd_iommu_init_pci(void)
break;
}
+ /*
+ * Order is important here to make sure any unity map requirements are
+ * fulfilled. The unity mappings are created and written to the device
+ * table during the amd_iommu_init_api() call.
+ *
+ * After that we call init_device_table_dma() to make sure any
+ * uninitialized DTE will block DMA, and in the end we flush the caches
+ * of all IOMMUs to make sure the changes to the device table are
+ * active.
+ */
+ ret = amd_iommu_init_api();
+
init_device_table_dma();
for_each_iommu(iommu)
iommu_flush_all_caches(iommu);
- ret = amd_iommu_init_api();
-
if (!ret)
print_iommu_info();
diff --git a/kernel/drivers/iommu/amd_iommu_v2.c b/kernel/drivers/iommu/amd_iommu_v2.c
index 7caf2fa23..4831eb910 100644
--- a/kernel/drivers/iommu/amd_iommu_v2.c
+++ b/kernel/drivers/iommu/amd_iommu_v2.c
@@ -809,8 +809,10 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
goto out_free_domain;
group = iommu_group_get(&pdev->dev);
- if (!group)
+ if (!group) {
+ ret = -EINVAL;
goto out_free_domain;
+ }
ret = iommu_attach_group(dev_state->domain, group);
if (ret != 0)
diff --git a/kernel/drivers/iommu/arm-smmu-v3.c b/kernel/drivers/iommu/arm-smmu-v3.c
index 4e5118a4c..00df3832f 100644
--- a/kernel/drivers/iommu/arm-smmu-v3.c
+++ b/kernel/drivers/iommu/arm-smmu-v3.c
@@ -870,7 +870,7 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
* We may have concurrent producers, so we need to be careful
* not to touch any of the shadow cmdq state.
*/
- queue_read(cmd, Q_ENT(q, idx), q->ent_dwords);
+ queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
dev_err(smmu->dev, "skipping command in error state:\n");
for (i = 0; i < ARRAY_SIZE(cmd); ++i)
dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
@@ -881,7 +881,7 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
return;
}
- queue_write(cmd, Q_ENT(q, idx), q->ent_dwords);
+ queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
}
static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
@@ -1025,6 +1025,9 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
case STRTAB_STE_0_CFG_S2_TRANS:
ste_live = true;
break;
+ case STRTAB_STE_0_CFG_ABORT:
+ if (disable_bypass)
+ break;
default:
BUG(); /* STE corruption */
}
@@ -1919,6 +1922,7 @@ static struct iommu_ops arm_smmu_ops = {
.detach_dev = arm_smmu_detach_dev,
.map = arm_smmu_map,
.unmap = arm_smmu_unmap,
+ .map_sg = default_iommu_map_sg,
.iova_to_phys = arm_smmu_iova_to_phys,
.add_device = arm_smmu_add_device,
.remove_device = arm_smmu_remove_device,
diff --git a/kernel/drivers/iommu/dma-iommu.c b/kernel/drivers/iommu/dma-iommu.c
index 72d618266..347a3c17f 100644
--- a/kernel/drivers/iommu/dma-iommu.c
+++ b/kernel/drivers/iommu/dma-iommu.c
@@ -68,7 +68,8 @@ void iommu_put_dma_cookie(struct iommu_domain *domain)
if (!iovad)
return;
- put_iova_domain(iovad);
+ if (iovad->granule)
+ put_iova_domain(iovad);
kfree(iovad);
domain->iova_cookie = NULL;
}
@@ -403,7 +404,7 @@ static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
unsigned int s_length = sg_dma_len(s);
unsigned int s_dma_len = s->length;
- s->offset = s_offset;
+ s->offset += s_offset;
s->length = s_length;
sg_dma_address(s) = dma_addr + s_offset;
dma_addr += s_dma_len;
@@ -422,7 +423,7 @@ static void __invalidate_sg(struct scatterlist *sg, int nents)
for_each_sg(sg, s, nents, i) {
if (sg_dma_address(s) != DMA_ERROR_CODE)
- s->offset = sg_dma_address(s);
+ s->offset += sg_dma_address(s);
if (sg_dma_len(s))
s->length = sg_dma_len(s);
sg_dma_address(s) = DMA_ERROR_CODE;
diff --git a/kernel/drivers/iommu/dmar.c b/kernel/drivers/iommu/dmar.c
index 3821c4786..e913a930a 100644
--- a/kernel/drivers/iommu/dmar.c
+++ b/kernel/drivers/iommu/dmar.c
@@ -326,7 +326,9 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb,
struct pci_dev *pdev = to_pci_dev(data);
struct dmar_pci_notify_info *info;
- /* Only care about add/remove events for physical functions */
+ /* Only care about add/remove events for physical functions.
+ * For VFs we actually do the lookup based on the corresponding
+ * PF in device_to_iommu() anyway. */
if (pdev->is_virtfn)
return NOTIFY_DONE;
if (action != BUS_NOTIFY_ADD_DEVICE &&
@@ -1858,10 +1860,11 @@ static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
/*
* All PCI devices managed by this unit should have been destroyed.
*/
- if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt)
+ if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt) {
for_each_active_dev_scope(dmaru->devices,
dmaru->devices_cnt, i, dev)
return -EBUSY;
+ }
ret = dmar_ir_hotplug(dmaru, false);
if (ret == 0)
diff --git a/kernel/drivers/iommu/exynos-iommu.c b/kernel/drivers/iommu/exynos-iommu.c
index 97c41b8ab..29a31eb9a 100644
--- a/kernel/drivers/iommu/exynos-iommu.c
+++ b/kernel/drivers/iommu/exynos-iommu.c
@@ -647,6 +647,7 @@ static struct platform_driver exynos_sysmmu_driver __refdata = {
.name = "exynos-sysmmu",
.of_match_table = sysmmu_of_match,
.pm = &sysmmu_pm_ops,
+ .suppress_bind_attrs = true,
}
};
diff --git a/kernel/drivers/iommu/intel-iommu.c b/kernel/drivers/iommu/intel-iommu.c
index a2e1b7f14..9413b0726 100644
--- a/kernel/drivers/iommu/intel-iommu.c
+++ b/kernel/drivers/iommu/intel-iommu.c
@@ -885,7 +885,13 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf
return NULL;
if (dev_is_pci(dev)) {
+ struct pci_dev *pf_pdev;
+
pdev = to_pci_dev(dev);
+ /* VFs aren't listed in scope tables; we need to look up
+ * the PF instead to find the IOMMU. */
+ pf_pdev = pci_physfn(pdev);
+ dev = &pf_pdev->dev;
segment = pci_domain_nr(pdev->bus);
} else if (has_acpi_companion(dev))
dev = &ACPI_COMPANION(dev)->dev;
@@ -898,6 +904,13 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf
for_each_active_dev_scope(drhd->devices,
drhd->devices_cnt, i, tmp) {
if (tmp == dev) {
+ /* For a VF use its original BDF# not that of the PF
+ * which we used for the IOMMU lookup. Strictly speaking
+ * we could do this for all PCI devices; we only need to
+ * get the BDF# from the scope table for ACPI matches. */
+ if (pdev->is_virtfn)
+ goto got_pdev;
+
*bus = drhd->devices[i].bus;
*devfn = drhd->devices[i].devfn;
goto out;
@@ -1672,6 +1685,7 @@ static void disable_dmar_iommu(struct intel_iommu *iommu)
if (!iommu->domains || !iommu->domain_ids)
return;
+again:
spin_lock_irqsave(&device_domain_lock, flags);
list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
struct dmar_domain *domain;
@@ -1684,10 +1698,19 @@ static void disable_dmar_iommu(struct intel_iommu *iommu)
domain = info->domain;
- dmar_remove_one_dev_info(domain, info->dev);
+ __dmar_remove_one_dev_info(info);
- if (!domain_type_is_vm_or_si(domain))
+ if (!domain_type_is_vm_or_si(domain)) {
+ /*
+ * The domain_exit() function can't be called under
+ * device_domain_lock, as it takes this lock itself.
+ * So release the lock here and re-run the loop
+ * afterwards.
+ */
+ spin_unlock_irqrestore(&device_domain_lock, flags);
domain_exit(domain);
+ goto again;
+ }
}
spin_unlock_irqrestore(&device_domain_lock, flags);
@@ -1970,6 +1993,25 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
if (context_present(context))
goto out_unlock;
+ /*
+ * For kdump cases, old valid entries may be cached due to the
+ * in-flight DMA and copied pgtable, but there is no unmapping
+ * behaviour for them, thus we need an explicit cache flush for
+ * the newly-mapped device. For kdump, at this point, the device
+ * is supposed to finish reset at its driver probe stage, so no
+ * in-flight DMA will exist, and we don't need to worry anymore
+ * hereafter.
+ */
+ if (context_copied(context)) {
+ u16 did_old = context_domain_id(context);
+
+ if (did_old >= 0 && did_old < cap_ndoms(iommu->cap))
+ iommu->flush.flush_context(iommu, did_old,
+ (((u16)bus) << 8) | devfn,
+ DMA_CCMD_MASK_NOBIT,
+ DMA_CCMD_DEVICE_INVL);
+ }
+
pgd = domain->pgd;
context_clear_entry(context);
@@ -2032,7 +2074,7 @@ out_unlock:
spin_unlock(&iommu->lock);
spin_unlock_irqrestore(&device_domain_lock, flags);
- return 0;
+ return ret;
}
struct domain_context_mapping_data {
@@ -3169,11 +3211,6 @@ static int __init init_dmars(void)
}
}
- iommu_flush_write_buffer(iommu);
- iommu_set_root_entry(iommu);
- iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
- iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
-
if (!ecap_pass_through(iommu->ecap))
hw_pass_through = 0;
#ifdef CONFIG_INTEL_IOMMU_SVM
@@ -3182,6 +3219,18 @@ static int __init init_dmars(void)
#endif
}
+ /*
+ * Now that qi is enabled on all iommus, set the root entry and flush
+ * caches. This is required on some Intel X58 chipsets, otherwise the
+ * flush_context function will loop forever and the boot hangs.
+ */
+ for_each_active_iommu(iommu, drhd) {
+ iommu_flush_write_buffer(iommu);
+ iommu_set_root_entry(iommu);
+ iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
+ iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
+ }
+
if (iommu_pass_through)
iommu_identity_mapping |= IDENTMAP_ALL;
@@ -4175,10 +4224,11 @@ int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
if (!atsru)
return 0;
- if (!atsru->include_all && atsru->devices && atsru->devices_cnt)
+ if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
i, dev)
return -EBUSY;
+ }
return 0;
}
@@ -4989,6 +5039,25 @@ static void intel_iommu_remove_device(struct device *dev)
}
#ifdef CONFIG_INTEL_IOMMU_SVM
+#define MAX_NR_PASID_BITS (20)
+static inline unsigned long intel_iommu_get_pts(struct intel_iommu *iommu)
+{
+ /*
+ * Convert ecap_pss to extend context entry pts encoding, also
+ * respect the soft pasid_max value set by the iommu.
+ * - number of PASID bits = ecap_pss + 1
+ * - number of PASID table entries = 2^(pts + 5)
+ * Therefore, pts = ecap_pss - 4
+ * e.g. KBL ecap_pss = 0x13, PASID has 20 bits, pts = 15
+ */
+ if (ecap_pss(iommu->ecap) < 5)
+ return 0;
+
+ /* pasid_max is encoded as actual number of entries not the bits */
+ return find_first_bit((unsigned long *)&iommu->pasid_max,
+ MAX_NR_PASID_BITS) - 5;
+}
+
int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
{
struct device_domain_info *info;
@@ -5021,7 +5090,9 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd
if (!(ctx_lo & CONTEXT_PASIDE)) {
context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
- context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | ecap_pss(iommu->ecap);
+ context[1].lo = (u64)virt_to_phys(iommu->pasid_table) |
+ intel_iommu_get_pts(iommu);
+
wmb();
/* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
* extended to permit requests-with-PASID if the PASIDE bit
diff --git a/kernel/drivers/iommu/intel-svm.c b/kernel/drivers/iommu/intel-svm.c
index d9939fa9b..f929879ec 100644
--- a/kernel/drivers/iommu/intel-svm.c
+++ b/kernel/drivers/iommu/intel-svm.c
@@ -39,10 +39,18 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
struct page *pages;
int order;
- order = ecap_pss(iommu->ecap) + 7 - PAGE_SHIFT;
- if (order < 0)
- order = 0;
-
+ /* Start at 2 because it's defined as 2^(1+PSS) */
+ iommu->pasid_max = 2 << ecap_pss(iommu->ecap);
+
+ /* Eventually I'm promised we will get a multi-level PASID table
+ * and it won't have to be physically contiguous. Until then,
+ * limit the size because 8MiB contiguous allocations can be hard
+ * to come by. The limit of 0x20000, which is 1MiB for each of
+ * the PASID and PASID-state tables, is somewhat arbitrary. */
+ if (iommu->pasid_max > 0x20000)
+ iommu->pasid_max = 0x20000;
+
+ order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
if (!pages) {
pr_warn("IOMMU: %s: Failed to allocate PASID table\n",
@@ -53,6 +61,8 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
pr_info("%s: Allocated order %d PASID table.\n", iommu->name, order);
if (ecap_dis(iommu->ecap)) {
+ /* Just making it explicit... */
+ BUILD_BUG_ON(sizeof(struct pasid_entry) != sizeof(struct pasid_state_entry));
pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
if (pages)
iommu->pasid_state_table = page_address(pages);
@@ -68,11 +78,7 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
int intel_svm_free_pasid_tables(struct intel_iommu *iommu)
{
- int order;
-
- order = ecap_pss(iommu->ecap) + 7 - PAGE_SHIFT;
- if (order < 0)
- order = 0;
+ int order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
if (iommu->pasid_table) {
free_pages((unsigned long)iommu->pasid_table, order);
@@ -371,8 +377,8 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
}
svm->iommu = iommu;
- if (pasid_max > 2 << ecap_pss(iommu->ecap))
- pasid_max = 2 << ecap_pss(iommu->ecap);
+ if (pasid_max > iommu->pasid_max)
+ pasid_max = iommu->pasid_max;
/* Do not use PASID 0 in caching mode (virtualised IOMMU) */
ret = idr_alloc(&iommu->pasid_idr, svm,
diff --git a/kernel/drivers/iommu/iommu.c b/kernel/drivers/iommu/iommu.c
index 0e3b0092e..515bb8b80 100644
--- a/kernel/drivers/iommu/iommu.c
+++ b/kernel/drivers/iommu/iommu.c
@@ -848,7 +848,8 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev)
if (!group->default_domain) {
group->default_domain = __iommu_domain_alloc(dev->bus,
IOMMU_DOMAIN_DMA);
- group->domain = group->default_domain;
+ if (!group->domain)
+ group->domain = group->default_domain;
}
ret = iommu_group_add_device(group, dev);