summaryrefslogtreecommitdiffstats
path: root/kernel/drivers/gpu/drm/i915/i915_gem_gtt.c
diff options
context:
space:
mode:
authorJosé Pekkarinen <jose.pekkarinen@nokia.com>2016-04-11 10:41:07 +0300
committerJosé Pekkarinen <jose.pekkarinen@nokia.com>2016-04-13 08:17:18 +0300
commite09b41010ba33a20a87472ee821fa407a5b8da36 (patch)
treed10dc367189862e7ca5c592f033dc3726e1df4e3 /kernel/drivers/gpu/drm/i915/i915_gem_gtt.c
parentf93b97fd65072de626c074dbe099a1fff05ce060 (diff)
These changes are the raw update to linux-4.4.6-rt14. Kernel sources
are taken from kernel.org, and rt patch from the rt wiki download page. During the rebasing, the following patch collided: Force tick interrupt and get rid of softirq magic(I70131fb85). Collisions have been removed because its logic was found on the source already. Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769 Signed-off-by: José Pekkarinen <jose.pekkarinen@nokia.com>
Diffstat (limited to 'kernel/drivers/gpu/drm/i915/i915_gem_gtt.c')
-rw-r--r--kernel/drivers/gpu/drm/i915/i915_gem_gtt.c2234
1 files changed, 1510 insertions, 724 deletions
diff --git a/kernel/drivers/gpu/drm/i915/i915_gem_gtt.c b/kernel/drivers/gpu/drm/i915/i915_gem_gtt.c
index ad90fa304..86c750045 100644
--- a/kernel/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/kernel/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -92,14 +92,14 @@
*
*/
+static int
+i915_get_ggtt_vma_pages(struct i915_vma *vma);
+
const struct i915_ggtt_view i915_ggtt_view_normal;
const struct i915_ggtt_view i915_ggtt_view_rotated = {
.type = I915_GGTT_VIEW_ROTATED
};
-static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv);
-static void chv_setup_private_ppat(struct drm_i915_private *dev_priv);
-
static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
{
bool has_aliasing_ppgtt;
@@ -146,14 +146,33 @@ static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
return has_aliasing_ppgtt ? 1 : 0;
}
-static void ppgtt_bind_vma(struct i915_vma *vma,
- enum i915_cache_level cache_level,
- u32 flags);
-static void ppgtt_unbind_vma(struct i915_vma *vma);
+static int ppgtt_bind_vma(struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 unused)
+{
+ u32 pte_flags = 0;
+
+ /* Currently applicable only to VLV */
+ if (vma->obj->gt_ro)
+ pte_flags |= PTE_READ_ONLY;
+
+ vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
+ cache_level, pte_flags);
+
+ return 0;
+}
+
+static void ppgtt_unbind_vma(struct i915_vma *vma)
+{
+ vma->vm->clear_range(vma->vm,
+ vma->node.start,
+ vma->obj->base.size,
+ true);
+}
-static inline gen8_pte_t gen8_pte_encode(dma_addr_t addr,
- enum i915_cache_level level,
- bool valid)
+static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ bool valid)
{
gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
pte |= addr;
@@ -173,9 +192,8 @@ static inline gen8_pte_t gen8_pte_encode(dma_addr_t addr,
return pte;
}
-static inline gen8_pde_t gen8_pde_encode(struct drm_device *dev,
- dma_addr_t addr,
- enum i915_cache_level level)
+static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
+ const enum i915_cache_level level)
{
gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
pde |= addr;
@@ -186,6 +204,9 @@ static inline gen8_pde_t gen8_pde_encode(struct drm_device *dev,
return pde;
}
+#define gen8_pdpe_encode gen8_pde_encode
+#define gen8_pml4e_encode gen8_pde_encode
+
static gen6_pte_t snb_pte_encode(dma_addr_t addr,
enum i915_cache_level level,
bool valid, u32 unused)
@@ -282,59 +303,122 @@ static gen6_pte_t iris_pte_encode(dma_addr_t addr,
return pte;
}
-#define i915_dma_unmap_single(px, dev) \
- __i915_dma_unmap_single((px)->daddr, dev)
-
-static inline void __i915_dma_unmap_single(dma_addr_t daddr,
- struct drm_device *dev)
+static int __setup_page_dma(struct drm_device *dev,
+ struct i915_page_dma *p, gfp_t flags)
{
struct device *device = &dev->pdev->dev;
- dma_unmap_page(device, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
+ p->page = alloc_page(flags);
+ if (!p->page)
+ return -ENOMEM;
+
+ p->daddr = dma_map_page(device,
+ p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
+
+ if (dma_mapping_error(device, p->daddr)) {
+ __free_page(p->page);
+ return -EINVAL;
+ }
+
+ return 0;
}
-/**
- * i915_dma_map_single() - Create a dma mapping for a page table/dir/etc.
- * @px: Page table/dir/etc to get a DMA map for
- * @dev: drm device
- *
- * Page table allocations are unified across all gens. They always require a
- * single 4k allocation, as well as a DMA mapping. If we keep the structs
- * symmetric here, the simple macro covers us for every page table type.
- *
- * Return: 0 if success.
+static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
+{
+ return __setup_page_dma(dev, p, GFP_KERNEL);
+}
+
+static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
+{
+ if (WARN_ON(!p->page))
+ return;
+
+ dma_unmap_page(&dev->pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL);
+ __free_page(p->page);
+ memset(p, 0, sizeof(*p));
+}
+
+static void *kmap_page_dma(struct i915_page_dma *p)
+{
+ return kmap_atomic(p->page);
+}
+
+/* We use the flushing unmap only with ppgtt structures:
+ * page directories, page tables and scratch pages.
*/
-#define i915_dma_map_single(px, dev) \
- i915_dma_map_page_single((px)->page, (dev), &(px)->daddr)
+static void kunmap_page_dma(struct drm_device *dev, void *vaddr)
+{
+ /* There are only few exceptions for gen >=6. chv and bxt.
+ * And we are not sure about the latter so play safe for now.
+ */
+ if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
+ drm_clflush_virt_range(vaddr, PAGE_SIZE);
+
+ kunmap_atomic(vaddr);
+}
+
+#define kmap_px(px) kmap_page_dma(px_base(px))
+#define kunmap_px(ppgtt, vaddr) kunmap_page_dma((ppgtt)->base.dev, (vaddr))
+
+#define setup_px(dev, px) setup_page_dma((dev), px_base(px))
+#define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px))
+#define fill_px(dev, px, v) fill_page_dma((dev), px_base(px), (v))
+#define fill32_px(dev, px, v) fill_page_dma_32((dev), px_base(px), (v))
-static inline int i915_dma_map_page_single(struct page *page,
- struct drm_device *dev,
- dma_addr_t *daddr)
+static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p,
+ const uint64_t val)
{
- struct device *device = &dev->pdev->dev;
+ int i;
+ uint64_t * const vaddr = kmap_page_dma(p);
- *daddr = dma_map_page(device, page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
- if (dma_mapping_error(device, *daddr))
- return -ENOMEM;
+ for (i = 0; i < 512; i++)
+ vaddr[i] = val;
- return 0;
+ kunmap_page_dma(dev, vaddr);
}
-static void unmap_and_free_pt(struct i915_page_table_entry *pt,
- struct drm_device *dev)
+static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p,
+ const uint32_t val32)
{
- if (WARN_ON(!pt->page))
- return;
+ uint64_t v = val32;
- i915_dma_unmap_single(pt, dev);
- __free_page(pt->page);
- kfree(pt->used_ptes);
- kfree(pt);
+ v = v << 32 | val32;
+
+ fill_page_dma(dev, p, v);
+}
+
+static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev)
+{
+ struct i915_page_scratch *sp;
+ int ret;
+
+ sp = kzalloc(sizeof(*sp), GFP_KERNEL);
+ if (sp == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ ret = __setup_page_dma(dev, px_base(sp), GFP_DMA32 | __GFP_ZERO);
+ if (ret) {
+ kfree(sp);
+ return ERR_PTR(ret);
+ }
+
+ set_pages_uc(px_page(sp), 1);
+
+ return sp;
}
-static struct i915_page_table_entry *alloc_pt_single(struct drm_device *dev)
+static void free_scratch_page(struct drm_device *dev,
+ struct i915_page_scratch *sp)
{
- struct i915_page_table_entry *pt;
+ set_pages_wb(px_page(sp), 1);
+
+ cleanup_px(dev, sp);
+ kfree(sp);
+}
+
+static struct i915_page_table *alloc_pt(struct drm_device *dev)
+{
+ struct i915_page_table *pt;
const size_t count = INTEL_INFO(dev)->gen >= 8 ?
GEN8_PTES : GEN6_PTES;
int ret = -ENOMEM;
@@ -349,19 +433,13 @@ static struct i915_page_table_entry *alloc_pt_single(struct drm_device *dev)
if (!pt->used_ptes)
goto fail_bitmap;
- pt->page = alloc_page(GFP_KERNEL);
- if (!pt->page)
- goto fail_page;
-
- ret = i915_dma_map_single(pt, dev);
+ ret = setup_px(dev, pt);
if (ret)
- goto fail_dma;
+ goto fail_page_m;
return pt;
-fail_dma:
- __free_page(pt->page);
-fail_page:
+fail_page_m:
kfree(pt->used_ptes);
fail_bitmap:
kfree(pt);
@@ -369,109 +447,239 @@ fail_bitmap:
return ERR_PTR(ret);
}
-/**
- * alloc_pt_range() - Allocate a multiple page tables
- * @pd: The page directory which will have at least @count entries
- * available to point to the allocated page tables.
- * @pde: First page directory entry for which we are allocating.
- * @count: Number of pages to allocate.
- * @dev: DRM device.
- *
- * Allocates multiple page table pages and sets the appropriate entries in the
- * page table structure within the page directory. Function cleans up after
- * itself on any failures.
- *
- * Return: 0 if allocation succeeded.
- */
-static int alloc_pt_range(struct i915_page_directory_entry *pd, uint16_t pde, size_t count,
- struct drm_device *dev)
+static void free_pt(struct drm_device *dev, struct i915_page_table *pt)
{
- int i, ret;
-
- /* 512 is the max page tables per page_directory on any platform. */
- if (WARN_ON(pde + count > I915_PDES))
- return -EINVAL;
-
- for (i = pde; i < pde + count; i++) {
- struct i915_page_table_entry *pt = alloc_pt_single(dev);
+ cleanup_px(dev, pt);
+ kfree(pt->used_ptes);
+ kfree(pt);
+}
- if (IS_ERR(pt)) {
- ret = PTR_ERR(pt);
- goto err_out;
- }
- WARN(pd->page_table[i],
- "Leaking page directory entry %d (%p)\n",
- i, pd->page_table[i]);
- pd->page_table[i] = pt;
- }
+static void gen8_initialize_pt(struct i915_address_space *vm,
+ struct i915_page_table *pt)
+{
+ gen8_pte_t scratch_pte;
- return 0;
+ scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
+ I915_CACHE_LLC, true);
-err_out:
- while (i-- > pde)
- unmap_and_free_pt(pd->page_table[i], dev);
- return ret;
+ fill_px(vm->dev, pt, scratch_pte);
}
-static void unmap_and_free_pd(struct i915_page_directory_entry *pd)
+static void gen6_initialize_pt(struct i915_address_space *vm,
+ struct i915_page_table *pt)
{
- if (pd->page) {
- __free_page(pd->page);
- kfree(pd);
- }
+ gen6_pte_t scratch_pte;
+
+ WARN_ON(px_dma(vm->scratch_page) == 0);
+
+ scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
+ I915_CACHE_LLC, true, 0);
+
+ fill32_px(vm->dev, pt, scratch_pte);
}
-static struct i915_page_directory_entry *alloc_pd_single(void)
+static struct i915_page_directory *alloc_pd(struct drm_device *dev)
{
- struct i915_page_directory_entry *pd;
+ struct i915_page_directory *pd;
+ int ret = -ENOMEM;
pd = kzalloc(sizeof(*pd), GFP_KERNEL);
if (!pd)
return ERR_PTR(-ENOMEM);
- pd->page = alloc_page(GFP_KERNEL | __GFP_ZERO);
- if (!pd->page) {
+ pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES),
+ sizeof(*pd->used_pdes), GFP_KERNEL);
+ if (!pd->used_pdes)
+ goto fail_bitmap;
+
+ ret = setup_px(dev, pd);
+ if (ret)
+ goto fail_page_m;
+
+ return pd;
+
+fail_page_m:
+ kfree(pd->used_pdes);
+fail_bitmap:
+ kfree(pd);
+
+ return ERR_PTR(ret);
+}
+
+static void free_pd(struct drm_device *dev, struct i915_page_directory *pd)
+{
+ if (px_page(pd)) {
+ cleanup_px(dev, pd);
+ kfree(pd->used_pdes);
kfree(pd);
+ }
+}
+
+static void gen8_initialize_pd(struct i915_address_space *vm,
+ struct i915_page_directory *pd)
+{
+ gen8_pde_t scratch_pde;
+
+ scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
+
+ fill_px(vm->dev, pd, scratch_pde);
+}
+
+static int __pdp_init(struct drm_device *dev,
+ struct i915_page_directory_pointer *pdp)
+{
+ size_t pdpes = I915_PDPES_PER_PDP(dev);
+
+ pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
+ sizeof(unsigned long),
+ GFP_KERNEL);
+ if (!pdp->used_pdpes)
+ return -ENOMEM;
+
+ pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory),
+ GFP_KERNEL);
+ if (!pdp->page_directory) {
+ kfree(pdp->used_pdpes);
+ /* the PDP might be the statically allocated top level. Keep it
+ * as clean as possible */
+ pdp->used_pdpes = NULL;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void __pdp_fini(struct i915_page_directory_pointer *pdp)
+{
+ kfree(pdp->used_pdpes);
+ kfree(pdp->page_directory);
+ pdp->page_directory = NULL;
+}
+
+static struct
+i915_page_directory_pointer *alloc_pdp(struct drm_device *dev)
+{
+ struct i915_page_directory_pointer *pdp;
+ int ret = -ENOMEM;
+
+ WARN_ON(!USES_FULL_48BIT_PPGTT(dev));
+
+ pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
+ if (!pdp)
return ERR_PTR(-ENOMEM);
+
+ ret = __pdp_init(dev, pdp);
+ if (ret)
+ goto fail_bitmap;
+
+ ret = setup_px(dev, pdp);
+ if (ret)
+ goto fail_page_m;
+
+ return pdp;
+
+fail_page_m:
+ __pdp_fini(pdp);
+fail_bitmap:
+ kfree(pdp);
+
+ return ERR_PTR(ret);
+}
+
+static void free_pdp(struct drm_device *dev,
+ struct i915_page_directory_pointer *pdp)
+{
+ __pdp_fini(pdp);
+ if (USES_FULL_48BIT_PPGTT(dev)) {
+ cleanup_px(dev, pdp);
+ kfree(pdp);
}
+}
- return pd;
+static void gen8_initialize_pdp(struct i915_address_space *vm,
+ struct i915_page_directory_pointer *pdp)
+{
+ gen8_ppgtt_pdpe_t scratch_pdpe;
+
+ scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
+
+ fill_px(vm->dev, pdp, scratch_pdpe);
+}
+
+static void gen8_initialize_pml4(struct i915_address_space *vm,
+ struct i915_pml4 *pml4)
+{
+ gen8_ppgtt_pml4e_t scratch_pml4e;
+
+ scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
+ I915_CACHE_LLC);
+
+ fill_px(vm->dev, pml4, scratch_pml4e);
+}
+
+static void
+gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt,
+ struct i915_page_directory_pointer *pdp,
+ struct i915_page_directory *pd,
+ int index)
+{
+ gen8_ppgtt_pdpe_t *page_directorypo;
+
+ if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
+ return;
+
+ page_directorypo = kmap_px(pdp);
+ page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
+ kunmap_px(ppgtt, page_directorypo);
+}
+
+static void
+gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt,
+ struct i915_pml4 *pml4,
+ struct i915_page_directory_pointer *pdp,
+ int index)
+{
+ gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
+
+ WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev));
+ pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
+ kunmap_px(ppgtt, pagemap);
}
/* Broadwell Page Directory Pointer Descriptors */
-static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry,
- uint64_t val)
+static int gen8_write_pdp(struct drm_i915_gem_request *req,
+ unsigned entry,
+ dma_addr_t addr)
{
+ struct intel_engine_cs *ring = req->ring;
int ret;
BUG_ON(entry >= 4);
- ret = intel_ring_begin(ring, 6);
+ ret = intel_ring_begin(req, 6);
if (ret)
return ret;
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry));
- intel_ring_emit(ring, (u32)(val >> 32));
+ intel_ring_emit(ring, upper_32_bits(addr));
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry));
- intel_ring_emit(ring, (u32)(val));
+ intel_ring_emit(ring, lower_32_bits(addr));
intel_ring_advance(ring);
return 0;
}
-static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
- struct intel_engine_cs *ring)
+static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt,
+ struct drm_i915_gem_request *req)
{
int i, ret;
- /* bit of a hack to find the actual last used pd */
- int used_pd = ppgtt->num_pd_entries / I915_PDES;
+ for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
+ const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
- for (i = used_pd - 1; i >= 0; i--) {
- dma_addr_t addr = ppgtt->pdp.page_directory[i]->daddr;
- ret = gen8_write_pdp(ring, i, addr);
+ ret = gen8_write_pdp(req, i, pd_daddr);
if (ret)
return ret;
}
@@ -479,174 +687,292 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
return 0;
}
-static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
- uint64_t start,
- uint64_t length,
- bool use_scratch)
+static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
+ struct drm_i915_gem_request *req)
+{
+ return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
+}
+
+static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
+ struct i915_page_directory_pointer *pdp,
+ uint64_t start,
+ uint64_t length,
+ gen8_pte_t scratch_pte)
{
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
- gen8_pte_t *pt_vaddr, scratch_pte;
- unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
- unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
- unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
+ gen8_pte_t *pt_vaddr;
+ unsigned pdpe = gen8_pdpe_index(start);
+ unsigned pde = gen8_pde_index(start);
+ unsigned pte = gen8_pte_index(start);
unsigned num_entries = length >> PAGE_SHIFT;
unsigned last_pte, i;
- scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr,
- I915_CACHE_LLC, use_scratch);
+ if (WARN_ON(!pdp))
+ return;
while (num_entries) {
- struct i915_page_directory_entry *pd;
- struct i915_page_table_entry *pt;
- struct page *page_table;
+ struct i915_page_directory *pd;
+ struct i915_page_table *pt;
- if (WARN_ON(!ppgtt->pdp.page_directory[pdpe]))
+ if (WARN_ON(!pdp->page_directory[pdpe]))
break;
- pd = ppgtt->pdp.page_directory[pdpe];
+ pd = pdp->page_directory[pdpe];
if (WARN_ON(!pd->page_table[pde]))
break;
pt = pd->page_table[pde];
- if (WARN_ON(!pt->page))
+ if (WARN_ON(!px_page(pt)))
break;
- page_table = pt->page;
-
last_pte = pte + num_entries;
if (last_pte > GEN8_PTES)
last_pte = GEN8_PTES;
- pt_vaddr = kmap_atomic(page_table);
+ pt_vaddr = kmap_px(pt);
for (i = pte; i < last_pte; i++) {
pt_vaddr[i] = scratch_pte;
num_entries--;
}
- if (!HAS_LLC(ppgtt->base.dev))
- drm_clflush_virt_range(pt_vaddr, PAGE_SIZE);
- kunmap_atomic(pt_vaddr);
+ kunmap_px(ppgtt, pt);
pte = 0;
if (++pde == I915_PDES) {
- pdpe++;
+ if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
+ break;
pde = 0;
}
}
}
-static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
- struct sg_table *pages,
- uint64_t start,
- enum i915_cache_level cache_level, u32 unused)
+static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
+ uint64_t start,
+ uint64_t length,
+ bool use_scratch)
+{
+ struct i915_hw_ppgtt *ppgtt =
+ container_of(vm, struct i915_hw_ppgtt, base);
+ gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
+ I915_CACHE_LLC, use_scratch);
+
+ if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
+ gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length,
+ scratch_pte);
+ } else {
+ uint64_t templ4, pml4e;
+ struct i915_page_directory_pointer *pdp;
+
+ gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, templ4, pml4e) {
+ gen8_ppgtt_clear_pte_range(vm, pdp, start, length,
+ scratch_pte);
+ }
+ }
+}
+
+static void
+gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
+ struct i915_page_directory_pointer *pdp,
+ struct sg_page_iter *sg_iter,
+ uint64_t start,
+ enum i915_cache_level cache_level)
{
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
gen8_pte_t *pt_vaddr;
- unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
- unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
- unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
- struct sg_page_iter sg_iter;
+ unsigned pdpe = gen8_pdpe_index(start);
+ unsigned pde = gen8_pde_index(start);
+ unsigned pte = gen8_pte_index(start);
pt_vaddr = NULL;
- for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
- if (WARN_ON(pdpe >= GEN8_LEGACY_PDPES))
- break;
-
+ while (__sg_page_iter_next(sg_iter)) {
if (pt_vaddr == NULL) {
- struct i915_page_directory_entry *pd = ppgtt->pdp.page_directory[pdpe];
- struct i915_page_table_entry *pt = pd->page_table[pde];
- struct page *page_table = pt->page;
-
- pt_vaddr = kmap_atomic(page_table);
+ struct i915_page_directory *pd = pdp->page_directory[pdpe];
+ struct i915_page_table *pt = pd->page_table[pde];
+ pt_vaddr = kmap_px(pt);
}
pt_vaddr[pte] =
- gen8_pte_encode(sg_page_iter_dma_address(&sg_iter),
+ gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
cache_level, true);
if (++pte == GEN8_PTES) {
- if (!HAS_LLC(ppgtt->base.dev))
- drm_clflush_virt_range(pt_vaddr, PAGE_SIZE);
- kunmap_atomic(pt_vaddr);
+ kunmap_px(ppgtt, pt_vaddr);
pt_vaddr = NULL;
if (++pde == I915_PDES) {
- pdpe++;
+ if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
+ break;
pde = 0;
}
pte = 0;
}
}
- if (pt_vaddr) {
- if (!HAS_LLC(ppgtt->base.dev))
- drm_clflush_virt_range(pt_vaddr, PAGE_SIZE);
- kunmap_atomic(pt_vaddr);
+
+ if (pt_vaddr)
+ kunmap_px(ppgtt, pt_vaddr);
+}
+
+static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
+ struct sg_table *pages,
+ uint64_t start,
+ enum i915_cache_level cache_level,
+ u32 unused)
+{
+ struct i915_hw_ppgtt *ppgtt =
+ container_of(vm, struct i915_hw_ppgtt, base);
+ struct sg_page_iter sg_iter;
+
+ __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
+
+ if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
+ gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start,
+ cache_level);
+ } else {
+ struct i915_page_directory_pointer *pdp;
+ uint64_t templ4, pml4e;
+ uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT;
+
+ gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, templ4, pml4e) {
+ gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
+ start, cache_level);
+ }
}
}
-static void gen8_free_page_tables(struct i915_page_directory_entry *pd, struct drm_device *dev)
+static void gen8_free_page_tables(struct drm_device *dev,
+ struct i915_page_directory *pd)
{
int i;
- if (!pd->page)
+ if (!px_page(pd))
return;
- for (i = 0; i < I915_PDES; i++) {
+ for_each_set_bit(i, pd->used_pdes, I915_PDES) {
if (WARN_ON(!pd->page_table[i]))
continue;
- unmap_and_free_pt(pd->page_table[i], dev);
+ free_pt(dev, pd->page_table[i]);
pd->page_table[i] = NULL;
}
}
-static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
+static int gen8_init_scratch(struct i915_address_space *vm)
+{
+ struct drm_device *dev = vm->dev;
+
+ vm->scratch_page = alloc_scratch_page(dev);
+ if (IS_ERR(vm->scratch_page))
+ return PTR_ERR(vm->scratch_page);
+
+ vm->scratch_pt = alloc_pt(dev);
+ if (IS_ERR(vm->scratch_pt)) {
+ free_scratch_page(dev, vm->scratch_page);
+ return PTR_ERR(vm->scratch_pt);
+ }
+
+ vm->scratch_pd = alloc_pd(dev);
+ if (IS_ERR(vm->scratch_pd)) {
+ free_pt(dev, vm->scratch_pt);
+ free_scratch_page(dev, vm->scratch_page);
+ return PTR_ERR(vm->scratch_pd);
+ }
+
+ if (USES_FULL_48BIT_PPGTT(dev)) {
+ vm->scratch_pdp = alloc_pdp(dev);
+ if (IS_ERR(vm->scratch_pdp)) {
+ free_pd(dev, vm->scratch_pd);
+ free_pt(dev, vm->scratch_pt);
+ free_scratch_page(dev, vm->scratch_page);
+ return PTR_ERR(vm->scratch_pdp);
+ }
+ }
+
+ gen8_initialize_pt(vm, vm->scratch_pt);
+ gen8_initialize_pd(vm, vm->scratch_pd);
+ if (USES_FULL_48BIT_PPGTT(dev))
+ gen8_initialize_pdp(vm, vm->scratch_pdp);
+
+ return 0;
+}
+
+static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
{
+ enum vgt_g2v_type msg;
+ struct drm_device *dev = ppgtt->base.dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ unsigned int offset = vgtif_reg(pdp0_lo);
int i;
- for (i = 0; i < ppgtt->num_pd_pages; i++) {
- if (WARN_ON(!ppgtt->pdp.page_directory[i]))
- continue;
+ if (USES_FULL_48BIT_PPGTT(dev)) {
+ u64 daddr = px_dma(&ppgtt->pml4);
+
+ I915_WRITE(offset, lower_32_bits(daddr));
+ I915_WRITE(offset + 4, upper_32_bits(daddr));
+
+ msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
+ VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
+ } else {
+ for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
+ u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
+
+ I915_WRITE(offset, lower_32_bits(daddr));
+ I915_WRITE(offset + 4, upper_32_bits(daddr));
- gen8_free_page_tables(ppgtt->pdp.page_directory[i], ppgtt->base.dev);
- unmap_and_free_pd(ppgtt->pdp.page_directory[i]);
+ offset += 8;
+ }
+
+ msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
+ VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
}
+
+ I915_WRITE(vgtif_reg(g2v_notify), msg);
+
+ return 0;
+}
+
+static void gen8_free_scratch(struct i915_address_space *vm)
+{
+ struct drm_device *dev = vm->dev;
+
+ if (USES_FULL_48BIT_PPGTT(dev))
+ free_pdp(dev, vm->scratch_pdp);
+ free_pd(dev, vm->scratch_pd);
+ free_pt(dev, vm->scratch_pt);
+ free_scratch_page(dev, vm->scratch_page);
}
-static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
+static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev,
+ struct i915_page_directory_pointer *pdp)
{
- struct pci_dev *hwdev = ppgtt->base.dev->pdev;
- int i, j;
+ int i;
- for (i = 0; i < ppgtt->num_pd_pages; i++) {
- /* TODO: In the future we'll support sparse mappings, so this
- * will have to change. */
- if (!ppgtt->pdp.page_directory[i]->daddr)
+ for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) {
+ if (WARN_ON(!pdp->page_directory[i]))
continue;
- pci_unmap_page(hwdev, ppgtt->pdp.page_directory[i]->daddr, PAGE_SIZE,
- PCI_DMA_BIDIRECTIONAL);
+ gen8_free_page_tables(dev, pdp->page_directory[i]);
+ free_pd(dev, pdp->page_directory[i]);
+ }
- for (j = 0; j < I915_PDES; j++) {
- struct i915_page_directory_entry *pd = ppgtt->pdp.page_directory[i];
- struct i915_page_table_entry *pt;
- dma_addr_t addr;
+ free_pdp(dev, pdp);
+}
- if (WARN_ON(!pd->page_table[j]))
- continue;
+static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
+{
+ int i;
- pt = pd->page_table[j];
- addr = pt->daddr;
+ for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
+ if (WARN_ON(!ppgtt->pml4.pdps[i]))
+ continue;
- if (addr)
- pci_unmap_page(hwdev, addr, PAGE_SIZE,
- PCI_DMA_BIDIRECTIONAL);
- }
+ gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]);
}
+
+ cleanup_px(ppgtt->base.dev, &ppgtt->pml4);
}
static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
@@ -654,113 +980,517 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
- gen8_ppgtt_unmap_pages(ppgtt);
- gen8_ppgtt_free(ppgtt);
+ if (intel_vgpu_active(vm->dev))
+ gen8_ppgtt_notify_vgt(ppgtt, false);
+
+ if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
+ gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp);
+ else
+ gen8_ppgtt_cleanup_4lvl(ppgtt);
+
+ gen8_free_scratch(vm);
}
-static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt)
+/**
+ * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
+ * @vm: Master vm structure.
+ * @pd: Page directory for this address range.
+ * @start: Starting virtual address to begin allocations.
+ * @length: Size of the allocations.
+ * @new_pts: Bitmap set by function with new allocations. Likely used by the
+ * caller to free on error.
+ *
+ * Allocate the required number of page tables. Extremely similar to
+ * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
+ * the page directory boundary (instead of the page directory pointer). That
+ * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is
+ * possible, and likely that the caller will need to use multiple calls of this
+ * function to achieve the appropriate allocation.
+ *
+ * Return: 0 if success; negative error code otherwise.
+ */
+static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
+ struct i915_page_directory *pd,
+ uint64_t start,
+ uint64_t length,
+ unsigned long *new_pts)
{
- int i, ret;
+ struct drm_device *dev = vm->dev;
+ struct i915_page_table *pt;
+ uint64_t temp;
+ uint32_t pde;
+
+ gen8_for_each_pde(pt, pd, start, length, temp, pde) {
+ /* Don't reallocate page tables */
+ if (test_bit(pde, pd->used_pdes)) {
+ /* Scratch is never allocated this way */
+ WARN_ON(pt == vm->scratch_pt);
+ continue;
+ }
- for (i = 0; i < ppgtt->num_pd_pages; i++) {
- ret = alloc_pt_range(ppgtt->pdp.page_directory[i],
- 0, I915_PDES, ppgtt->base.dev);
- if (ret)
+ pt = alloc_pt(dev);
+ if (IS_ERR(pt))
goto unwind_out;
+
+ gen8_initialize_pt(vm, pt);
+ pd->page_table[pde] = pt;
+ __set_bit(pde, new_pts);
+ trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT);
}
return 0;
unwind_out:
- while (i--)
- gen8_free_page_tables(ppgtt->pdp.page_directory[i], ppgtt->base.dev);
+ for_each_set_bit(pde, new_pts, I915_PDES)
+ free_pt(dev, pd->page_table[pde]);
return -ENOMEM;
}
-static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt,
- const int max_pdp)
+/**
+ * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range.
+ * @vm: Master vm structure.
+ * @pdp: Page directory pointer for this address range.
+ * @start: Starting virtual address to begin allocations.
+ * @length: Size of the allocations.
+ * @new_pds: Bitmap set by function with new allocations. Likely used by the
+ * caller to free on error.
+ *
+ * Allocate the required number of page directories starting at the pde index of
+ * @start, and ending at the pde index @start + @length. This function will skip
+ * over already allocated page directories within the range, and only allocate
+ * new ones, setting the appropriate pointer within the pdp as well as the
+ * correct position in the bitmap @new_pds.
+ *
+ * The function will only allocate the pages within the range for a give page
+ * directory pointer. In other words, if @start + @length straddles a virtually
+ * addressed PDP boundary (512GB for 4k pages), there will be more allocations
+ * required by the caller, This is not currently possible, and the BUG in the
+ * code will prevent it.
+ *
+ * Return: 0 if success; negative error code otherwise.
+ */
+static int
+gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
+ struct i915_page_directory_pointer *pdp,
+ uint64_t start,
+ uint64_t length,
+ unsigned long *new_pds)
{
- int i;
+ struct drm_device *dev = vm->dev;
+ struct i915_page_directory *pd;
+ uint64_t temp;
+ uint32_t pdpe;
+ uint32_t pdpes = I915_PDPES_PER_PDP(dev);
+
+ WARN_ON(!bitmap_empty(new_pds, pdpes));
+
+ gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
+ if (test_bit(pdpe, pdp->used_pdpes))
+ continue;
- for (i = 0; i < max_pdp; i++) {
- ppgtt->pdp.page_directory[i] = alloc_pd_single();
- if (IS_ERR(ppgtt->pdp.page_directory[i]))
+ pd = alloc_pd(dev);
+ if (IS_ERR(pd))
goto unwind_out;
+
+ gen8_initialize_pd(vm, pd);
+ pdp->page_directory[pdpe] = pd;
+ __set_bit(pdpe, new_pds);
+ trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT);
}
- ppgtt->num_pd_pages = max_pdp;
- BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPES);
+ return 0;
+
+unwind_out:
+ for_each_set_bit(pdpe, new_pds, pdpes)
+ free_pd(dev, pdp->page_directory[pdpe]);
+
+ return -ENOMEM;
+}
+
+/**
+ * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range.
+ * @vm: Master vm structure.
+ * @pml4: Page map level 4 for this address range.
+ * @start: Starting virtual address to begin allocations.
+ * @length: Size of the allocations.
+ * @new_pdps: Bitmap set by function with new allocations. Likely used by the
+ * caller to free on error.
+ *
+ * Allocate the required number of page directory pointers. Extremely similar to
+ * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs().
+ * The main difference is here we are limited by the pml4 boundary (instead of
+ * the page directory pointer).
+ *
+ * Return: 0 if success; negative error code otherwise.
+ */
+static int
+gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
+ struct i915_pml4 *pml4,
+ uint64_t start,
+ uint64_t length,
+ unsigned long *new_pdps)
+{
+ struct drm_device *dev = vm->dev;
+ struct i915_page_directory_pointer *pdp;
+ uint64_t temp;
+ uint32_t pml4e;
+
+ WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4));
+
+ gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
+ if (!test_bit(pml4e, pml4->used_pml4es)) {
+ pdp = alloc_pdp(dev);
+ if (IS_ERR(pdp))
+ goto unwind_out;
+
+ gen8_initialize_pdp(vm, pdp);
+ pml4->pdps[pml4e] = pdp;
+ __set_bit(pml4e, new_pdps);
+ trace_i915_page_directory_pointer_entry_alloc(vm,
+ pml4e,
+ start,
+ GEN8_PML4E_SHIFT);
+ }
+ }
return 0;
unwind_out:
- while (i--)
- unmap_and_free_pd(ppgtt->pdp.page_directory[i]);
+ for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
+ free_pdp(dev, pml4->pdps[pml4e]);
return -ENOMEM;
}
-static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt,
- const int max_pdp)
+static void
+free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
+{
+ kfree(new_pts);
+ kfree(new_pds);
+}
+
+/* Fills in the page directory bitmap, and the array of page tables bitmap. Both
+ * of these are based on the number of PDPEs in the system.
+ */
+static
+int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
+ unsigned long **new_pts,
+ uint32_t pdpes)
+{
+ unsigned long *pds;
+ unsigned long *pts;
+
+ pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
+ if (!pds)
+ return -ENOMEM;
+
+ pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long),
+ GFP_TEMPORARY);
+ if (!pts)
+ goto err_out;
+
+ *new_pds = pds;
+ *new_pts = pts;
+
+ return 0;
+
+err_out:
+ free_gen8_temp_bitmaps(pds, pts);
+ return -ENOMEM;
+}
+
+/* PDE TLBs are a pain to invalidate on GEN8+. When we modify
+ * the page table structures, we mark them dirty so that
+ * context switching/execlist queuing code takes extra steps
+ * to ensure that tlbs are flushed.
+ */
+static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
{
+ ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask;
+}
+
+static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
+ struct i915_page_directory_pointer *pdp,
+ uint64_t start,
+ uint64_t length)
+{
+ struct i915_hw_ppgtt *ppgtt =
+ container_of(vm, struct i915_hw_ppgtt, base);
+ unsigned long *new_page_dirs, *new_page_tables;
+ struct drm_device *dev = vm->dev;
+ struct i915_page_directory *pd;
+ const uint64_t orig_start = start;
+ const uint64_t orig_length = length;
+ uint64_t temp;
+ uint32_t pdpe;
+ uint32_t pdpes = I915_PDPES_PER_PDP(dev);
int ret;
- ret = gen8_ppgtt_allocate_page_directories(ppgtt, max_pdp);
+ /* Wrap is never okay since we can only represent 48b, and we don't
+ * actually use the other side of the canonical address space.
+ */
+ if (WARN_ON(start + length < start))
+ return -ENODEV;
+
+ if (WARN_ON(start + length > vm->total))
+ return -ENODEV;
+
+ ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
if (ret)
return ret;
- ret = gen8_ppgtt_allocate_page_tables(ppgtt);
- if (ret)
- goto err_out;
+ /* Do the allocations first so we can easily bail out */
+ ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
+ new_page_dirs);
+ if (ret) {
+ free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+ return ret;
+ }
- ppgtt->num_pd_entries = max_pdp * I915_PDES;
+ /* For every page directory referenced, allocate page tables */
+ gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
+ ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length,
+ new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES));
+ if (ret)
+ goto err_out;
+ }
+ start = orig_start;
+ length = orig_length;
+
+ /* Allocations have completed successfully, so set the bitmaps, and do
+ * the mappings. */
+ gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
+ gen8_pde_t *const page_directory = kmap_px(pd);
+ struct i915_page_table *pt;
+ uint64_t pd_len = length;
+ uint64_t pd_start = start;
+ uint32_t pde;
+
+ /* Every pd should be allocated, we just did that above. */
+ WARN_ON(!pd);
+
+ gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) {
+ /* Same reasoning as pd */
+ WARN_ON(!pt);
+ WARN_ON(!pd_len);
+ WARN_ON(!gen8_pte_count(pd_start, pd_len));
+
+ /* Set our used ptes within the page table */
+ bitmap_set(pt->used_ptes,
+ gen8_pte_index(pd_start),
+ gen8_pte_count(pd_start, pd_len));
+
+ /* Our pde is now pointing to the pagetable, pt */
+ __set_bit(pde, pd->used_pdes);
+
+ /* Map the PDE to the page table */
+ page_directory[pde] = gen8_pde_encode(px_dma(pt),
+ I915_CACHE_LLC);
+ trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
+ gen8_pte_index(start),
+ gen8_pte_count(start, length),
+ GEN8_PTES);
+
+ /* NB: We haven't yet mapped ptes to pages. At this
+ * point we're still relying on insert_entries() */
+ }
+
+ kunmap_px(ppgtt, page_directory);
+ __set_bit(pdpe, pdp->used_pdpes);
+ gen8_setup_page_directory(ppgtt, pdp, pd, pdpe);
+ }
+
+ free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+ mark_tlbs_dirty(ppgtt);
return 0;
err_out:
- gen8_ppgtt_free(ppgtt);
+ while (pdpe--) {
+ for_each_set_bit(temp, new_page_tables + pdpe *
+ BITS_TO_LONGS(I915_PDES), I915_PDES)
+ free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]);
+ }
+
+ for_each_set_bit(pdpe, new_page_dirs, pdpes)
+ free_pd(dev, pdp->page_directory[pdpe]);
+
+ free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+ mark_tlbs_dirty(ppgtt);
return ret;
}
-static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt,
- const int pd)
+static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
+ struct i915_pml4 *pml4,
+ uint64_t start,
+ uint64_t length)
{
- dma_addr_t pd_addr;
- int ret;
+ DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
+ struct i915_hw_ppgtt *ppgtt =
+ container_of(vm, struct i915_hw_ppgtt, base);
+ struct i915_page_directory_pointer *pdp;
+ uint64_t temp, pml4e;
+ int ret = 0;
- pd_addr = pci_map_page(ppgtt->base.dev->pdev,
- ppgtt->pdp.page_directory[pd]->page, 0,
- PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+ /* Do the pml4 allocations first, so we don't need to track the newly
+ * allocated tables below the pdp */
+ bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4);
- ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr);
+ /* The pagedirectory and pagetable allocations are done in the shared 3
+ * and 4 level code. Just allocate the pdps.
+ */
+ ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length,
+ new_pdps);
if (ret)
return ret;
- ppgtt->pdp.page_directory[pd]->daddr = pd_addr;
+ WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2,
+ "The allocation has spanned more than 512GB. "
+ "It is highly likely this is incorrect.");
+
+ gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
+ WARN_ON(!pdp);
+
+ ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
+ if (ret)
+ goto err_out;
+
+ gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e);
+ }
+
+ bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
+ GEN8_PML4ES_PER_PML4);
return 0;
+
+err_out:
+ for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
+ gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]);
+
+ return ret;
+}
+
+static int gen8_alloc_va_range(struct i915_address_space *vm,
+ uint64_t start, uint64_t length)
+{
+ struct i915_hw_ppgtt *ppgtt =
+ container_of(vm, struct i915_hw_ppgtt, base);
+
+ if (USES_FULL_48BIT_PPGTT(vm->dev))
+ return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length);
+ else
+ return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
+}
+
+static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
+ uint64_t start, uint64_t length,
+ gen8_pte_t scratch_pte,
+ struct seq_file *m)
+{
+ struct i915_page_directory *pd;
+ uint64_t temp;
+ uint32_t pdpe;
+
+ gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
+ struct i915_page_table *pt;
+ uint64_t pd_len = length;
+ uint64_t pd_start = start;
+ uint32_t pde;
+
+ if (!test_bit(pdpe, pdp->used_pdpes))
+ continue;
+
+ seq_printf(m, "\tPDPE #%d\n", pdpe);
+ gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) {
+ uint32_t pte;
+ gen8_pte_t *pt_vaddr;
+
+ if (!test_bit(pde, pd->used_pdes))
+ continue;
+
+ pt_vaddr = kmap_px(pt);
+ for (pte = 0; pte < GEN8_PTES; pte += 4) {
+ uint64_t va =
+ (pdpe << GEN8_PDPE_SHIFT) |
+ (pde << GEN8_PDE_SHIFT) |
+ (pte << GEN8_PTE_SHIFT);
+ int i;
+ bool found = false;
+
+ for (i = 0; i < 4; i++)
+ if (pt_vaddr[pte + i] != scratch_pte)
+ found = true;
+ if (!found)
+ continue;
+
+ seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte);
+ for (i = 0; i < 4; i++) {
+ if (pt_vaddr[pte + i] != scratch_pte)
+ seq_printf(m, " %llx", pt_vaddr[pte + i]);
+ else
+ seq_puts(m, " SCRATCH ");
+ }
+ seq_puts(m, "\n");
+ }
+ /* don't use kunmap_px, it could trigger
+ * an unnecessary flush.
+ */
+ kunmap_atomic(pt_vaddr);
+ }
+ }
}
-static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt,
- const int pd,
- const int pt)
+static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
{
- dma_addr_t pt_addr;
- struct i915_page_directory_entry *pdir = ppgtt->pdp.page_directory[pd];
- struct i915_page_table_entry *ptab = pdir->page_table[pt];
- struct page *p = ptab->page;
+ struct i915_address_space *vm = &ppgtt->base;
+ uint64_t start = ppgtt->base.start;
+ uint64_t length = ppgtt->base.total;
+ gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
+ I915_CACHE_LLC, true);
+
+ if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
+ gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
+ } else {
+ uint64_t templ4, pml4e;
+ struct i915_pml4 *pml4 = &ppgtt->pml4;
+ struct i915_page_directory_pointer *pdp;
+
+ gen8_for_each_pml4e(pdp, pml4, start, length, templ4, pml4e) {
+ if (!test_bit(pml4e, pml4->used_pml4es))
+ continue;
+
+ seq_printf(m, " PML4E #%llu\n", pml4e);
+ gen8_dump_pdp(pdp, start, length, scratch_pte, m);
+ }
+ }
+}
+
+static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
+{
+ unsigned long *new_page_dirs, *new_page_tables;
+ uint32_t pdpes = I915_PDPES_PER_PDP(dev);
int ret;
- pt_addr = pci_map_page(ppgtt->base.dev->pdev,
- p, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
- ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pt_addr);
+ /* We allocate temp bitmap for page tables for no gain
+ * but as this is for init only, lets keep the things simple
+ */
+ ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
if (ret)
return ret;
- ptab->daddr = pt_addr;
+ /* Allocate for all pdps regardless of how the ppgtt
+ * was defined.
+ */
+ ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp,
+ 0, 1ULL << 32,
+ new_page_dirs);
+ if (!ret)
+ *ppgtt->pdp.used_pdpes = *new_page_dirs;
- return 0;
+ free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+
+ return ret;
}
/*
@@ -769,115 +1499,78 @@ static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt,
* PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
* space.
*
- * FIXME: split allocation into smaller pieces. For now we only ever do this
- * once, but with full PPGTT, the multiple contiguous allocations will be bad.
- * TODO: Do something with the size parameter
*/
-static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
+static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
{
- const int max_pdp = DIV_ROUND_UP(size, 1 << 30);
- const int min_pt_pages = I915_PDES * max_pdp;
- int i, j, ret;
-
- if (size % (1<<30))
- DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size);
+ int ret;
- /* 1. Do all our allocations for page directories and page tables.
- * We allocate more than was asked so that we can point the unused parts
- * to valid entries that point to scratch page. Dynamic page tables
- * will fix this eventually.
- */
- ret = gen8_ppgtt_alloc(ppgtt, GEN8_LEGACY_PDPES);
+ ret = gen8_init_scratch(&ppgtt->base);
if (ret)
return ret;
- /*
- * 2. Create DMA mappings for the page directories and page tables.
- */
- for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
- ret = gen8_ppgtt_setup_page_directories(ppgtt, i);
+ ppgtt->base.start = 0;
+ ppgtt->base.cleanup = gen8_ppgtt_cleanup;
+ ppgtt->base.allocate_va_range = gen8_alloc_va_range;
+ ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
+ ppgtt->base.clear_range = gen8_ppgtt_clear_range;
+ ppgtt->base.unbind_vma = ppgtt_unbind_vma;
+ ppgtt->base.bind_vma = ppgtt_bind_vma;
+ ppgtt->debug_dump = gen8_dump_ppgtt;
+
+ if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
+ ret = setup_px(ppgtt->base.dev, &ppgtt->pml4);
if (ret)
- goto bail;
+ goto free_scratch;
- for (j = 0; j < I915_PDES; j++) {
- ret = gen8_ppgtt_setup_page_tables(ppgtt, i, j);
- if (ret)
- goto bail;
- }
- }
+ gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4);
- /*
- * 3. Map all the page directory entires to point to the page tables
- * we've allocated.
- *
- * For now, the PPGTT helper functions all require that the PDEs are
- * plugged in correctly. So we do that now/here. For aliasing PPGTT, we
- * will never need to touch the PDEs again.
- */
- for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
- struct i915_page_directory_entry *pd = ppgtt->pdp.page_directory[i];
- gen8_pde_t *pd_vaddr;
- pd_vaddr = kmap_atomic(ppgtt->pdp.page_directory[i]->page);
- for (j = 0; j < I915_PDES; j++) {
- struct i915_page_table_entry *pt = pd->page_table[j];
- dma_addr_t addr = pt->daddr;
- pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr,
- I915_CACHE_LLC);
- }
- if (!HAS_LLC(ppgtt->base.dev))
- drm_clflush_virt_range(pd_vaddr, PAGE_SIZE);
- kunmap_atomic(pd_vaddr);
- }
+ ppgtt->base.total = 1ULL << 48;
+ ppgtt->switch_mm = gen8_48b_mm_switch;
+ } else {
+ ret = __pdp_init(ppgtt->base.dev, &ppgtt->pdp);
+ if (ret)
+ goto free_scratch;
- ppgtt->switch_mm = gen8_mm_switch;
- ppgtt->base.clear_range = gen8_ppgtt_clear_range;
- ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
- ppgtt->base.cleanup = gen8_ppgtt_cleanup;
- ppgtt->base.start = 0;
+ ppgtt->base.total = 1ULL << 32;
+ ppgtt->switch_mm = gen8_legacy_mm_switch;
+ trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base,
+ 0, 0,
+ GEN8_PML4E_SHIFT);
- /* This is the area that we advertise as usable for the caller */
- ppgtt->base.total = max_pdp * I915_PDES * GEN8_PTES * PAGE_SIZE;
+ if (intel_vgpu_active(ppgtt->base.dev)) {
+ ret = gen8_preallocate_top_level_pdps(ppgtt);
+ if (ret)
+ goto free_scratch;
+ }
+ }
- /* Set all ptes to a valid scratch page. Also above requested space */
- ppgtt->base.clear_range(&ppgtt->base, 0,
- ppgtt->num_pd_pages * GEN8_PTES * PAGE_SIZE,
- true);
+ if (intel_vgpu_active(ppgtt->base.dev))
+ gen8_ppgtt_notify_vgt(ppgtt, true);
- DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n",
- ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp);
- DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n",
- ppgtt->num_pd_entries,
- (ppgtt->num_pd_entries - min_pt_pages) + size % (1<<30));
return 0;
-bail:
- gen8_ppgtt_unmap_pages(ppgtt);
- gen8_ppgtt_free(ppgtt);
+free_scratch:
+ gen8_free_scratch(&ppgtt->base);
return ret;
}
static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
{
- struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
struct i915_address_space *vm = &ppgtt->base;
- gen6_pte_t __iomem *pd_addr;
+ struct i915_page_table *unused;
gen6_pte_t scratch_pte;
uint32_t pd_entry;
- int pte, pde;
+ uint32_t pte, pde, temp;
+ uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
- scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0);
+ scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
+ I915_CACHE_LLC, true, 0);
- pd_addr = (gen6_pte_t __iomem *)dev_priv->gtt.gsm +
- ppgtt->pd.pd_offset / sizeof(gen6_pte_t);
-
- seq_printf(m, " VM %p (pd_offset %x-%x):\n", vm,
- ppgtt->pd.pd_offset,
- ppgtt->pd.pd_offset + ppgtt->num_pd_entries);
- for (pde = 0; pde < ppgtt->num_pd_entries; pde++) {
+ gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde) {
u32 expected;
gen6_pte_t *pt_vaddr;
- dma_addr_t pt_addr = ppgtt->pd.page_table[pde]->daddr;
- pd_entry = readl(pd_addr + pde);
+ const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]);
+ pd_entry = readl(ppgtt->pd_addr + pde);
expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
if (pd_entry != expected)
@@ -887,7 +1580,8 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
expected);
seq_printf(m, "\tPDE: %x\n", pd_entry);
- pt_vaddr = kmap_atomic(ppgtt->pd.page_table[pde]->page);
+ pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]);
+
for (pte = 0; pte < GEN6_PTES; pte+=4) {
unsigned long va =
(pde * PAGE_SIZE * GEN6_PTES) +
@@ -909,20 +1603,20 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
}
seq_puts(m, "\n");
}
- kunmap_atomic(pt_vaddr);
+ kunmap_px(ppgtt, pt_vaddr);
}
}
/* Write pde (index) from the page directory @pd to the page table @pt */
-static void gen6_write_pde(struct i915_page_directory_entry *pd,
- const int pde, struct i915_page_table_entry *pt)
+static void gen6_write_pde(struct i915_page_directory *pd,
+ const int pde, struct i915_page_table *pt)
{
/* Caller needs to make sure the write completes if necessary */
struct i915_hw_ppgtt *ppgtt =
container_of(pd, struct i915_hw_ppgtt, pd);
u32 pd_entry;
- pd_entry = GEN6_PDE_ADDR_ENCODE(pt->daddr);
+ pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt));
pd_entry |= GEN6_PDE_VALID;
writel(pd_entry, ppgtt->pd_addr + pde);
@@ -931,10 +1625,10 @@ static void gen6_write_pde(struct i915_page_directory_entry *pd,
/* Write all the page tables found in the ppgtt structure to incrementing page
* directories. */
static void gen6_write_page_range(struct drm_i915_private *dev_priv,
- struct i915_page_directory_entry *pd,
+ struct i915_page_directory *pd,
uint32_t start, uint32_t length)
{
- struct i915_page_table_entry *pt;
+ struct i915_page_table *pt;
uint32_t pde, temp;
gen6_for_each_pde(pt, pd, start, length, temp, pde)
@@ -947,22 +1641,23 @@ static void gen6_write_page_range(struct drm_i915_private *dev_priv,
static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
{
- BUG_ON(ppgtt->pd.pd_offset & 0x3f);
+ BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
- return (ppgtt->pd.pd_offset / 64) << 16;
+ return (ppgtt->pd.base.ggtt_offset / 64) << 16;
}
static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
- struct intel_engine_cs *ring)
+ struct drm_i915_gem_request *req)
{
+ struct intel_engine_cs *ring = req->ring;
int ret;
/* NB: TLBs must be flushed and invalidated before a switch */
- ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+ ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
if (ret)
return ret;
- ret = intel_ring_begin(ring, 6);
+ ret = intel_ring_begin(req, 6);
if (ret)
return ret;
@@ -978,8 +1673,9 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
}
static int vgpu_mm_switch(struct i915_hw_ppgtt *ppgtt,
- struct intel_engine_cs *ring)
+ struct drm_i915_gem_request *req)
{
+ struct intel_engine_cs *ring = req->ring;
struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev);
I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
@@ -988,16 +1684,17 @@ static int vgpu_mm_switch(struct i915_hw_ppgtt *ppgtt,
}
static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
- struct intel_engine_cs *ring)
+ struct drm_i915_gem_request *req)
{
+ struct intel_engine_cs *ring = req->ring;
int ret;
/* NB: TLBs must be flushed and invalidated before a switch */
- ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+ ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
if (ret)
return ret;
- ret = intel_ring_begin(ring, 6);
+ ret = intel_ring_begin(req, 6);
if (ret)
return ret;
@@ -1011,7 +1708,7 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
/* XXX: RCS is the only one to auto invalidate the TLBs? */
if (ring->id != RCS) {
- ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+ ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
if (ret)
return ret;
}
@@ -1020,8 +1717,9 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
}
static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
- struct intel_engine_cs *ring)
+ struct drm_i915_gem_request *req)
{
+ struct intel_engine_cs *ring = req->ring;
struct drm_device *dev = ppgtt->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1041,8 +1739,9 @@ static void gen8_ppgtt_enable(struct drm_device *dev)
int j;
for_each_ring(ring, dev_priv, j) {
+ u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0;
I915_WRITE(RING_MODE_GEN7(ring),
- _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+ _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
}
}
@@ -1105,19 +1804,20 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
unsigned first_pte = first_entry % GEN6_PTES;
unsigned last_pte, i;
- scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true, 0);
+ scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
+ I915_CACHE_LLC, true, 0);
while (num_entries) {
last_pte = first_pte + num_entries;
if (last_pte > GEN6_PTES)
last_pte = GEN6_PTES;
- pt_vaddr = kmap_atomic(ppgtt->pd.page_table[act_pt]->page);
+ pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
for (i = first_pte; i < last_pte; i++)
pt_vaddr[i] = scratch_pte;
- kunmap_atomic(pt_vaddr);
+ kunmap_px(ppgtt, pt_vaddr);
num_entries -= last_pte - first_pte;
first_pte = 0;
@@ -1141,66 +1841,41 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
pt_vaddr = NULL;
for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
if (pt_vaddr == NULL)
- pt_vaddr = kmap_atomic(ppgtt->pd.page_table[act_pt]->page);
+ pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
pt_vaddr[act_pte] =
vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
cache_level, true, flags);
if (++act_pte == GEN6_PTES) {
- kunmap_atomic(pt_vaddr);
+ kunmap_px(ppgtt, pt_vaddr);
pt_vaddr = NULL;
act_pt++;
act_pte = 0;
}
}
if (pt_vaddr)
- kunmap_atomic(pt_vaddr);
-}
-
-/* PDE TLBs are a pain invalidate pre GEN8. It requires a context reload. If we
- * are switching between contexts with the same LRCA, we also must do a force
- * restore.
- */
-static inline void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
-{
- /* If current vm != vm, */
- ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask;
-}
-
-static void gen6_initialize_pt(struct i915_address_space *vm,
- struct i915_page_table_entry *pt)
-{
- gen6_pte_t *pt_vaddr, scratch_pte;
- int i;
-
- WARN_ON(vm->scratch.addr == 0);
-
- scratch_pte = vm->pte_encode(vm->scratch.addr,
- I915_CACHE_LLC, true, 0);
-
- pt_vaddr = kmap_atomic(pt->page);
-
- for (i = 0; i < GEN6_PTES; i++)
- pt_vaddr[i] = scratch_pte;
-
- kunmap_atomic(pt_vaddr);
+ kunmap_px(ppgtt, pt_vaddr);
}
static int gen6_alloc_va_range(struct i915_address_space *vm,
- uint64_t start, uint64_t length)
+ uint64_t start_in, uint64_t length_in)
{
DECLARE_BITMAP(new_page_tables, I915_PDES);
struct drm_device *dev = vm->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
- struct i915_page_table_entry *pt;
- const uint32_t start_save = start, length_save = length;
+ struct i915_page_table *pt;
+ uint32_t start, length, start_save, length_save;
uint32_t pde, temp;
int ret;
- WARN_ON(upper_32_bits(start));
+ if (WARN_ON(start_in + length_in > ppgtt->base.total))
+ return -ENODEV;
+
+ start = start_save = start_in;
+ length = length_save = length_in;
bitmap_zero(new_page_tables, I915_PDES);
@@ -1210,7 +1885,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
* tables.
*/
gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
- if (pt != ppgtt->scratch_pt) {
+ if (pt != vm->scratch_pt) {
WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES));
continue;
}
@@ -1218,7 +1893,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
/* We've already allocated a page table */
WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
- pt = alloc_pt_single(dev);
+ pt = alloc_pt(dev);
if (IS_ERR(pt)) {
ret = PTR_ERR(pt);
goto unwind_out;
@@ -1227,7 +1902,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
gen6_initialize_pt(vm, pt);
ppgtt->pd.page_table[pde] = pt;
- set_bit(pde, new_page_tables);
+ __set_bit(pde, new_page_tables);
trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT);
}
@@ -1241,7 +1916,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
bitmap_set(tmp_bitmap, gen6_pte_index(start),
gen6_pte_count(start, length));
- if (test_and_clear_bit(pde, new_page_tables))
+ if (__test_and_clear_bit(pde, new_page_tables))
gen6_write_pde(&ppgtt->pd, pde, pt);
trace_i915_page_table_entry_map(vm, pde, pt,
@@ -1263,43 +1938,63 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
unwind_out:
for_each_set_bit(pde, new_page_tables, I915_PDES) {
- struct i915_page_table_entry *pt = ppgtt->pd.page_table[pde];
+ struct i915_page_table *pt = ppgtt->pd.page_table[pde];
- ppgtt->pd.page_table[pde] = ppgtt->scratch_pt;
- unmap_and_free_pt(pt, vm->dev);
+ ppgtt->pd.page_table[pde] = vm->scratch_pt;
+ free_pt(vm->dev, pt);
}
mark_tlbs_dirty(ppgtt);
return ret;
}
-static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
+static int gen6_init_scratch(struct i915_address_space *vm)
{
- int i;
+ struct drm_device *dev = vm->dev;
- for (i = 0; i < ppgtt->num_pd_entries; i++) {
- struct i915_page_table_entry *pt = ppgtt->pd.page_table[i];
+ vm->scratch_page = alloc_scratch_page(dev);
+ if (IS_ERR(vm->scratch_page))
+ return PTR_ERR(vm->scratch_page);
- if (pt != ppgtt->scratch_pt)
- unmap_and_free_pt(ppgtt->pd.page_table[i], ppgtt->base.dev);
+ vm->scratch_pt = alloc_pt(dev);
+ if (IS_ERR(vm->scratch_pt)) {
+ free_scratch_page(dev, vm->scratch_page);
+ return PTR_ERR(vm->scratch_pt);
}
- unmap_and_free_pt(ppgtt->scratch_pt, ppgtt->base.dev);
- unmap_and_free_pd(&ppgtt->pd);
+ gen6_initialize_pt(vm, vm->scratch_pt);
+
+ return 0;
+}
+
+static void gen6_free_scratch(struct i915_address_space *vm)
+{
+ struct drm_device *dev = vm->dev;
+
+ free_pt(dev, vm->scratch_pt);
+ free_scratch_page(dev, vm->scratch_page);
}
static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
{
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
+ struct i915_page_table *pt;
+ uint32_t pde;
drm_mm_remove_node(&ppgtt->node);
- gen6_ppgtt_free(ppgtt);
+ gen6_for_all_pdes(pt, ppgtt, pde) {
+ if (pt != vm->scratch_pt)
+ free_pt(ppgtt->base.dev, pt);
+ }
+
+ gen6_free_scratch(vm);
}
static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
{
+ struct i915_address_space *vm = &ppgtt->base;
struct drm_device *dev = ppgtt->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
bool retried = false;
@@ -1310,11 +2005,10 @@ static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
* size. We allocate at the top of the GTT to avoid fragmentation.
*/
BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm));
- ppgtt->scratch_pt = alloc_pt_single(ppgtt->base.dev);
- if (IS_ERR(ppgtt->scratch_pt))
- return PTR_ERR(ppgtt->scratch_pt);
- gen6_initialize_pt(&ppgtt->base, ppgtt->scratch_pt);
+ ret = gen6_init_scratch(vm);
+ if (ret)
+ return ret;
alloc:
ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
@@ -1342,11 +2036,10 @@ alloc:
if (ppgtt->node.start < dev_priv->gtt.mappable_end)
DRM_DEBUG("Forced to use aperture for PDEs\n");
- ppgtt->num_pd_entries = I915_PDES;
return 0;
err_out:
- unmap_and_free_pt(ppgtt->scratch_pt, ppgtt->base.dev);
+ gen6_free_scratch(vm);
return ret;
}
@@ -1358,14 +2051,14 @@ static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
uint64_t start, uint64_t length)
{
- struct i915_page_table_entry *unused;
+ struct i915_page_table *unused;
uint32_t pde, temp;
gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde)
- ppgtt->pd.page_table[pde] = ppgtt->scratch_pt;
+ ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt;
}
-static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt, bool aliasing)
+static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
{
struct drm_device *dev = ppgtt->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1388,35 +2081,23 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt, bool aliasing)
if (ret)
return ret;
- if (aliasing) {
- /* preallocate all pts */
- ret = alloc_pt_range(&ppgtt->pd, 0, ppgtt->num_pd_entries,
- ppgtt->base.dev);
-
- if (ret) {
- gen6_ppgtt_cleanup(&ppgtt->base);
- return ret;
- }
- }
-
ppgtt->base.allocate_va_range = gen6_alloc_va_range;
ppgtt->base.clear_range = gen6_ppgtt_clear_range;
ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
+ ppgtt->base.unbind_vma = ppgtt_unbind_vma;
+ ppgtt->base.bind_vma = ppgtt_bind_vma;
ppgtt->base.cleanup = gen6_ppgtt_cleanup;
ppgtt->base.start = 0;
- ppgtt->base.total = ppgtt->num_pd_entries * GEN6_PTES * PAGE_SIZE;
+ ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
ppgtt->debug_dump = gen6_dump_ppgtt;
- ppgtt->pd.pd_offset =
+ ppgtt->pd.base.ggtt_offset =
ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
ppgtt->pd_addr = (gen6_pte_t __iomem *)dev_priv->gtt.gsm +
- ppgtt->pd.pd_offset / sizeof(gen6_pte_t);
+ ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t);
- if (aliasing)
- ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
- else
- gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
+ gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
@@ -1425,35 +2106,40 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt, bool aliasing)
ppgtt->node.start / PAGE_SIZE);
DRM_DEBUG("Adding PPGTT at offset %x\n",
- ppgtt->pd.pd_offset << 10);
+ ppgtt->pd.base.ggtt_offset << 10);
return 0;
}
-static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt,
- bool aliasing)
+static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
{
- struct drm_i915_private *dev_priv = dev->dev_private;
-
ppgtt->base.dev = dev;
- ppgtt->base.scratch = dev_priv->gtt.base.scratch;
if (INTEL_INFO(dev)->gen < 8)
- return gen6_ppgtt_init(ppgtt, aliasing);
+ return gen6_ppgtt_init(ppgtt);
else
- return gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total);
+ return gen8_ppgtt_init(ppgtt);
+}
+
+static void i915_address_space_init(struct i915_address_space *vm,
+ struct drm_i915_private *dev_priv)
+{
+ drm_mm_init(&vm->mm, vm->start, vm->total);
+ vm->dev = dev_priv->dev;
+ INIT_LIST_HEAD(&vm->active_list);
+ INIT_LIST_HEAD(&vm->inactive_list);
+ list_add_tail(&vm->global_link, &dev_priv->vm_list);
}
+
int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
{
struct drm_i915_private *dev_priv = dev->dev_private;
int ret = 0;
- ret = __hw_ppgtt_init(dev, ppgtt, false);
+ ret = __hw_ppgtt_init(dev, ppgtt);
if (ret == 0) {
kref_init(&ppgtt->ref);
- drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
- ppgtt->base.total);
- i915_init_vm(dev_priv, &ppgtt->base);
+ i915_address_space_init(&ppgtt->base, dev_priv);
}
return ret;
@@ -1461,11 +2147,6 @@ int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
int i915_ppgtt_init_hw(struct drm_device *dev)
{
- struct drm_i915_private *dev_priv = dev->dev_private;
- struct intel_engine_cs *ring;
- struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
- int i, ret = 0;
-
/* In the case of execlists, PPGTT is enabled by the context descriptor
* and the PDPs are contained within the context itself. We don't
* need to do anything here. */
@@ -1484,16 +2165,23 @@ int i915_ppgtt_init_hw(struct drm_device *dev)
else
MISSING_CASE(INTEL_INFO(dev)->gen);
- if (ppgtt) {
- for_each_ring(ring, dev_priv, i) {
- ret = ppgtt->switch_mm(ppgtt, ring);
- if (ret != 0)
- return ret;
- }
- }
+ return 0;
+}
- return ret;
+int i915_ppgtt_init_ring(struct drm_i915_gem_request *req)
+{
+ struct drm_i915_private *dev_priv = req->ring->dev->dev_private;
+ struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
+
+ if (i915.enable_execlists)
+ return 0;
+
+ if (!ppgtt)
+ return 0;
+
+ return ppgtt->switch_mm(ppgtt, req);
}
+
struct i915_hw_ppgtt *
i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
{
@@ -1535,32 +2223,11 @@ void i915_ppgtt_release(struct kref *kref)
kfree(ppgtt);
}
-static void
-ppgtt_bind_vma(struct i915_vma *vma,
- enum i915_cache_level cache_level,
- u32 flags)
-{
- /* Currently applicable only to VLV */
- if (vma->obj->gt_ro)
- flags |= PTE_READ_ONLY;
-
- vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
- cache_level, flags);
-}
-
-static void ppgtt_unbind_vma(struct i915_vma *vma)
-{
- vma->vm->clear_range(vma->vm,
- vma->node.start,
- vma->obj->base.size,
- true);
-}
-
extern int intel_iommu_gfx_mapped;
/* Certain Gen5 chipsets require require idling the GPU before
* unmapping anything from the GTT when VT-d is enabled.
*/
-static inline bool needs_idle_maps(struct drm_device *dev)
+static bool needs_idle_maps(struct drm_device *dev)
{
#ifdef CONFIG_INTEL_IOMMU
/* Query intel_iommu to see if we need the workaround. Presumably that
@@ -1653,72 +2320,8 @@ void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
i915_ggtt_flush(dev_priv);
}
-void i915_gem_restore_gtt_mappings(struct drm_device *dev)
-{
- struct drm_i915_private *dev_priv = dev->dev_private;
- struct drm_i915_gem_object *obj;
- struct i915_address_space *vm;
-
- i915_check_and_clear_faults(dev);
-
- /* First fill our portion of the GTT with scratch pages */
- dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
- dev_priv->gtt.base.start,
- dev_priv->gtt.base.total,
- true);
-
- list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
- struct i915_vma *vma = i915_gem_obj_to_vma(obj,
- &dev_priv->gtt.base);
- if (!vma)
- continue;
-
- i915_gem_clflush_object(obj, obj->pin_display);
- /* The bind_vma code tries to be smart about tracking mappings.
- * Unfortunately above, we've just wiped out the mappings
- * without telling our object about it. So we need to fake it.
- *
- * Bind is not expected to fail since this is only called on
- * resume and assumption is all requirements exist already.
- */
- vma->bound &= ~GLOBAL_BIND;
- WARN_ON(i915_vma_bind(vma, obj->cache_level, GLOBAL_BIND));
- }
-
-
- if (INTEL_INFO(dev)->gen >= 8) {
- if (IS_CHERRYVIEW(dev))
- chv_setup_private_ppat(dev_priv);
- else
- bdw_setup_private_ppat(dev_priv);
-
- return;
- }
-
- if (USES_PPGTT(dev)) {
- list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
- /* TODO: Perhaps it shouldn't be gen6 specific */
-
- struct i915_hw_ppgtt *ppgtt =
- container_of(vm, struct i915_hw_ppgtt,
- base);
-
- if (i915_is_ggtt(vm))
- ppgtt = dev_priv->mm.aliasing_ppgtt;
-
- gen6_write_page_range(dev_priv, &ppgtt->pd,
- 0, ppgtt->base.total);
- }
- }
-
- i915_ggtt_flush(dev_priv);
-}
-
int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
{
- if (obj->has_dma_mapping)
- return 0;
-
if (!dma_map_sg(&obj->base.dev->pdev->dev,
obj->pages->sgl, obj->pages->nents,
PCI_DMA_BIDIRECTIONAL))
@@ -1727,7 +2330,7 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
return 0;
}
-static inline void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
+static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
{
#ifdef writeq
writeq(pte, addr);
@@ -1839,7 +2442,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
first_entry, num_entries, max_entries))
num_entries = max_entries;
- scratch_pte = gen8_pte_encode(vm->scratch.addr,
+ scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
I915_CACHE_LLC,
use_scratch);
for (i = 0; i < num_entries; i++)
@@ -1865,25 +2468,24 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
first_entry, num_entries, max_entries))
num_entries = max_entries;
- scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch, 0);
+ scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
+ I915_CACHE_LLC, use_scratch, 0);
for (i = 0; i < num_entries; i++)
iowrite32(scratch_pte, &gtt_base[i]);
readl(gtt_base);
}
-
-static void i915_ggtt_bind_vma(struct i915_vma *vma,
- enum i915_cache_level cache_level,
- u32 unused)
+static void i915_ggtt_insert_entries(struct i915_address_space *vm,
+ struct sg_table *pages,
+ uint64_t start,
+ enum i915_cache_level cache_level, u32 unused)
{
- const unsigned long entry = vma->node.start >> PAGE_SHIFT;
unsigned int flags = (cache_level == I915_CACHE_NONE) ?
AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
- BUG_ON(!i915_is_ggtt(vma->vm));
- intel_gtt_insert_sg_entries(vma->ggtt_view.pages, entry, flags);
- vma->bound = GLOBAL_BIND;
+ intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags);
+
}
static void i915_ggtt_clear_range(struct i915_address_space *vm,
@@ -1896,62 +2498,71 @@ static void i915_ggtt_clear_range(struct i915_address_space *vm,
intel_gtt_clear_range(first_entry, num_entries);
}
-static void i915_ggtt_unbind_vma(struct i915_vma *vma)
+static int ggtt_bind_vma(struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags)
{
- const unsigned int first = vma->node.start >> PAGE_SHIFT;
- const unsigned int size = vma->obj->base.size >> PAGE_SHIFT;
+ struct drm_i915_gem_object *obj = vma->obj;
+ u32 pte_flags = 0;
+ int ret;
+
+ ret = i915_get_ggtt_vma_pages(vma);
+ if (ret)
+ return ret;
+
+ /* Currently applicable only to VLV */
+ if (obj->gt_ro)
+ pte_flags |= PTE_READ_ONLY;
+
+ vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages,
+ vma->node.start,
+ cache_level, pte_flags);
+
+ /*
+ * Without aliasing PPGTT there's no difference between
+ * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
+ * upgrade to both bound if we bind either to avoid double-binding.
+ */
+ vma->bound |= GLOBAL_BIND | LOCAL_BIND;
- BUG_ON(!i915_is_ggtt(vma->vm));
- vma->bound = 0;
- intel_gtt_clear_range(first, size);
+ return 0;
}
-static void ggtt_bind_vma(struct i915_vma *vma,
- enum i915_cache_level cache_level,
- u32 flags)
+static int aliasing_gtt_bind_vma(struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags)
{
struct drm_device *dev = vma->vm->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_i915_gem_object *obj = vma->obj;
struct sg_table *pages = obj->pages;
+ u32 pte_flags = 0;
+ int ret;
+
+ ret = i915_get_ggtt_vma_pages(vma);
+ if (ret)
+ return ret;
+ pages = vma->ggtt_view.pages;
/* Currently applicable only to VLV */
if (obj->gt_ro)
- flags |= PTE_READ_ONLY;
+ pte_flags |= PTE_READ_ONLY;
- if (i915_is_ggtt(vma->vm))
- pages = vma->ggtt_view.pages;
- /* If there is no aliasing PPGTT, or the caller needs a global mapping,
- * or we have a global mapping already but the cacheability flags have
- * changed, set the global PTEs.
- *
- * If there is an aliasing PPGTT it is anecdotally faster, so use that
- * instead if none of the above hold true.
- *
- * NB: A global mapping should only be needed for special regions like
- * "gtt mappable", SNB errata, or if specified via special execbuf
- * flags. At all other times, the GPU will use the aliasing PPGTT.
- */
- if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) {
- if (!(vma->bound & GLOBAL_BIND) ||
- (cache_level != obj->cache_level)) {
- vma->vm->insert_entries(vma->vm, pages,
- vma->node.start,
- cache_level, flags);
- vma->bound |= GLOBAL_BIND;
- }
+ if (flags & GLOBAL_BIND) {
+ vma->vm->insert_entries(vma->vm, pages,
+ vma->node.start,
+ cache_level, pte_flags);
}
- if (dev_priv->mm.aliasing_ppgtt &&
- (!(vma->bound & LOCAL_BIND) ||
- (cache_level != obj->cache_level))) {
+ if (flags & LOCAL_BIND) {
struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
appgtt->base.insert_entries(&appgtt->base, pages,
vma->node.start,
- cache_level, flags);
- vma->bound |= LOCAL_BIND;
+ cache_level, pte_flags);
}
+
+ return 0;
}
static void ggtt_unbind_vma(struct i915_vma *vma)
@@ -1959,22 +2570,24 @@ static void ggtt_unbind_vma(struct i915_vma *vma)
struct drm_device *dev = vma->vm->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_i915_gem_object *obj = vma->obj;
+ const uint64_t size = min_t(uint64_t,
+ obj->base.size,
+ vma->node.size);
if (vma->bound & GLOBAL_BIND) {
vma->vm->clear_range(vma->vm,
vma->node.start,
- obj->base.size,
+ size,
true);
- vma->bound &= ~GLOBAL_BIND;
}
- if (vma->bound & LOCAL_BIND) {
+ if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) {
struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
+
appgtt->base.clear_range(&appgtt->base,
vma->node.start,
- obj->base.size,
+ size,
true);
- vma->bound &= ~LOCAL_BIND;
}
}
@@ -1986,10 +2599,8 @@ void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
interruptible = do_idling(dev_priv);
- if (!obj->has_dma_mapping)
- dma_unmap_sg(&dev->pdev->dev,
- obj->pages->sgl, obj->pages->nents,
- PCI_DMA_BIDIRECTIONAL);
+ dma_unmap_sg(&dev->pdev->dev, obj->pages->sgl, obj->pages->nents,
+ PCI_DMA_BIDIRECTIONAL);
undo_idling(dev_priv, interruptible);
}
@@ -2012,9 +2623,9 @@ static void i915_gtt_color_adjust(struct drm_mm_node *node,
}
static int i915_gem_setup_global_gtt(struct drm_device *dev,
- unsigned long start,
- unsigned long mappable_end,
- unsigned long end)
+ u64 start,
+ u64 mappable_end,
+ u64 end)
{
/* Let GEM Manage all of the aperture.
*
@@ -2034,11 +2645,13 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev,
BUG_ON(mappable_end > end);
- /* Subtract the guard page ... */
- drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE);
+ ggtt_vm->start = start;
- dev_priv->gtt.base.start = start;
- dev_priv->gtt.base.total = end - start;
+ /* Subtract the guard page before address space initialization to
+ * shrink the range used by drm_mm */
+ ggtt_vm->total = end - start - PAGE_SIZE;
+ i915_address_space_init(ggtt_vm, dev_priv);
+ ggtt_vm->total += PAGE_SIZE;
if (intel_vgpu_active(dev)) {
ret = intel_vgt_balloon(dev);
@@ -2047,13 +2660,13 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev,
}
if (!HAS_LLC(dev))
- dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust;
+ ggtt_vm->mm.color_adjust = i915_gtt_color_adjust;
/* Mark any preallocated objects as occupied */
list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
- DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n",
+ DRM_DEBUG_KMS("reserving preallocated space: %llx + %zx\n",
i915_gem_obj_ggtt_offset(obj), obj->base.size);
WARN_ON(i915_gem_obj_ggtt_bound(obj));
@@ -2063,6 +2676,8 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev,
return ret;
}
vma->bound |= GLOBAL_BIND;
+ __i915_vma_set_map_and_fenceable(vma);
+ list_add_tail(&vma->mm_list, &ggtt_vm->inactive_list);
}
/* Clear any non-preallocated blocks */
@@ -2083,13 +2698,30 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev,
if (!ppgtt)
return -ENOMEM;
- ret = __hw_ppgtt_init(dev, ppgtt, true);
+ ret = __hw_ppgtt_init(dev, ppgtt);
if (ret) {
+ ppgtt->base.cleanup(&ppgtt->base);
kfree(ppgtt);
return ret;
}
+ if (ppgtt->base.allocate_va_range)
+ ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0,
+ ppgtt->base.total);
+ if (ret) {
+ ppgtt->base.cleanup(&ppgtt->base);
+ kfree(ppgtt);
+ return ret;
+ }
+
+ ppgtt->base.clear_range(&ppgtt->base,
+ ppgtt->base.start,
+ ppgtt->base.total,
+ true);
+
dev_priv->mm.aliasing_ppgtt = ppgtt;
+ WARN_ON(dev_priv->gtt.base.bind_vma != ggtt_bind_vma);
+ dev_priv->gtt.base.bind_vma = aliasing_gtt_bind_vma;
}
return 0;
@@ -2098,7 +2730,7 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev,
void i915_gem_init_global_gtt(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
- unsigned long gtt_size, mappable_size;
+ u64 gtt_size, mappable_size;
gtt_size = dev_priv->gtt.base.total;
mappable_size = dev_priv->gtt.mappable_end;
@@ -2128,50 +2760,14 @@ void i915_global_gtt_cleanup(struct drm_device *dev)
vm->cleanup(vm);
}
-static int setup_scratch_page(struct drm_device *dev)
-{
- struct drm_i915_private *dev_priv = dev->dev_private;
- struct page *page;
- dma_addr_t dma_addr;
-
- page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
- if (page == NULL)
- return -ENOMEM;
- set_pages_uc(page, 1);
-
-#ifdef CONFIG_INTEL_IOMMU
- dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE,
- PCI_DMA_BIDIRECTIONAL);
- if (pci_dma_mapping_error(dev->pdev, dma_addr))
- return -EINVAL;
-#else
- dma_addr = page_to_phys(page);
-#endif
- dev_priv->gtt.base.scratch.page = page;
- dev_priv->gtt.base.scratch.addr = dma_addr;
-
- return 0;
-}
-
-static void teardown_scratch_page(struct drm_device *dev)
-{
- struct drm_i915_private *dev_priv = dev->dev_private;
- struct page *page = dev_priv->gtt.base.scratch.page;
-
- set_pages_wb(page, 1);
- pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr,
- PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
- __free_page(page);
-}
-
-static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
+static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
{
snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
return snb_gmch_ctl << 20;
}
-static inline unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
+static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
{
bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
@@ -2187,7 +2783,7 @@ static inline unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
return bdw_gmch_ctl << 20;
}
-static inline unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
+static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
{
gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
gmch_ctrl &= SNB_GMCH_GGMS_MASK;
@@ -2198,14 +2794,14 @@ static inline unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
return 0;
}
-static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
+static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
{
snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
return snb_gmch_ctl << 25; /* 32 MB units */
}
-static inline size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
+static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
{
bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
@@ -2246,27 +2842,40 @@ static int ggtt_probe_common(struct drm_device *dev,
size_t gtt_size)
{
struct drm_i915_private *dev_priv = dev->dev_private;
+ struct i915_page_scratch *scratch_page;
phys_addr_t gtt_phys_addr;
- int ret;
/* For Modern GENs the PTEs and register space are split in the BAR */
gtt_phys_addr = pci_resource_start(dev->pdev, 0) +
(pci_resource_len(dev->pdev, 0) / 2);
- dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size);
+ /*
+ * On BXT writes larger than 64 bit to the GTT pagetable range will be
+ * dropped. For WC mappings in general we have 64 byte burst writes
+ * when the WC buffer is flushed, so we can't use it, but have to
+ * resort to an uncached mapping. The WC issue is easily caught by the
+ * readback check when writing GTT PTE entries.
+ */
+ if (IS_BROXTON(dev))
+ dev_priv->gtt.gsm = ioremap_nocache(gtt_phys_addr, gtt_size);
+ else
+ dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size);
if (!dev_priv->gtt.gsm) {
DRM_ERROR("Failed to map the gtt page table\n");
return -ENOMEM;
}
- ret = setup_scratch_page(dev);
- if (ret) {
+ scratch_page = alloc_scratch_page(dev);
+ if (IS_ERR(scratch_page)) {
DRM_ERROR("Scratch setup failed\n");
/* iounmap will also get called at remove, but meh */
iounmap(dev_priv->gtt.gsm);
+ return PTR_ERR(scratch_page);
}
- return ret;
+ dev_priv->gtt.base.scratch_page = scratch_page;
+
+ return 0;
}
/* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
@@ -2303,8 +2912,8 @@ static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
/* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
* write would work. */
- I915_WRITE(GEN8_PRIVATE_PAT, pat);
- I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
+ I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
+ I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
}
static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
@@ -2338,18 +2947,18 @@ static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
GEN8_PPAT(6, CHV_PPAT_SNOOP) |
GEN8_PPAT(7, CHV_PPAT_SNOOP);
- I915_WRITE(GEN8_PRIVATE_PAT, pat);
- I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
+ I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
+ I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
}
static int gen8_gmch_probe(struct drm_device *dev,
- size_t *gtt_total,
+ u64 *gtt_total,
size_t *stolen,
phys_addr_t *mappable_base,
- unsigned long *mappable_end)
+ u64 *mappable_end)
{
struct drm_i915_private *dev_priv = dev->dev_private;
- unsigned int gtt_size;
+ u64 gtt_size;
u16 snb_gmch_ctl;
int ret;
@@ -2375,7 +2984,7 @@ static int gen8_gmch_probe(struct drm_device *dev,
*gtt_total = (gtt_size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
- if (IS_CHERRYVIEW(dev))
+ if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
chv_setup_private_ppat(dev_priv);
else
bdw_setup_private_ppat(dev_priv);
@@ -2384,15 +2993,17 @@ static int gen8_gmch_probe(struct drm_device *dev,
dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
+ dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
+ dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
return ret;
}
static int gen6_gmch_probe(struct drm_device *dev,
- size_t *gtt_total,
+ u64 *gtt_total,
size_t *stolen,
phys_addr_t *mappable_base,
- unsigned long *mappable_end)
+ u64 *mappable_end)
{
struct drm_i915_private *dev_priv = dev->dev_private;
unsigned int gtt_size;
@@ -2406,7 +3017,7 @@ static int gen6_gmch_probe(struct drm_device *dev,
* a coarse sanity check.
*/
if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) {
- DRM_ERROR("Unknown GMADR size (%lx)\n",
+ DRM_ERROR("Unknown GMADR size (%llx)\n",
dev_priv->gtt.mappable_end);
return -ENXIO;
}
@@ -2424,6 +3035,8 @@ static int gen6_gmch_probe(struct drm_device *dev,
dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
+ dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
+ dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
return ret;
}
@@ -2434,14 +3047,14 @@ static void gen6_gmch_remove(struct i915_address_space *vm)
struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base);
iounmap(gtt->gsm);
- teardown_scratch_page(vm->dev);
+ free_scratch_page(vm->dev, vm->scratch_page);
}
static int i915_gmch_probe(struct drm_device *dev,
- size_t *gtt_total,
+ u64 *gtt_total,
size_t *stolen,
phys_addr_t *mappable_base,
- unsigned long *mappable_end)
+ u64 *mappable_end)
{
struct drm_i915_private *dev_priv = dev->dev_private;
int ret;
@@ -2455,7 +3068,10 @@ static int i915_gmch_probe(struct drm_device *dev,
intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
+ dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries;
dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
+ dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
+ dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
if (unlikely(dev_priv->gtt.do_idle_maps))
DRM_INFO("applying Ironlake quirks for intel_iommu\n");
@@ -2495,17 +3111,17 @@ int i915_gem_gtt_init(struct drm_device *dev)
dev_priv->gtt.base.cleanup = gen6_gmch_remove;
}
+ gtt->base.dev = dev;
+
ret = gtt->gtt_probe(dev, &gtt->base.total, &gtt->stolen_size,
&gtt->mappable_base, &gtt->mappable_end);
if (ret)
return ret;
- gtt->base.dev = dev;
-
/* GMADR is the PCI mmio aperture into the global GTT. */
- DRM_INFO("Memory usable by graphics device = %zdM\n",
+ DRM_INFO("Memory usable by graphics device = %lluM\n",
gtt->base.total >> 20);
- DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20);
+ DRM_DEBUG_DRIVER("GMADR size = %lldM\n", gtt->mappable_end >> 20);
DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20);
#ifdef CONFIG_INTEL_IOMMU
if (intel_iommu_gfx_mapped)
@@ -2523,6 +3139,68 @@ int i915_gem_gtt_init(struct drm_device *dev)
return 0;
}
+void i915_gem_restore_gtt_mappings(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct drm_i915_gem_object *obj;
+ struct i915_address_space *vm;
+ struct i915_vma *vma;
+ bool flush;
+
+ i915_check_and_clear_faults(dev);
+
+ /* First fill our portion of the GTT with scratch pages */
+ dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
+ dev_priv->gtt.base.start,
+ dev_priv->gtt.base.total,
+ true);
+
+ /* Cache flush objects bound into GGTT and rebind them. */
+ vm = &dev_priv->gtt.base;
+ list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
+ flush = false;
+ list_for_each_entry(vma, &obj->vma_list, vma_link) {
+ if (vma->vm != vm)
+ continue;
+
+ WARN_ON(i915_vma_bind(vma, obj->cache_level,
+ PIN_UPDATE));
+
+ flush = true;
+ }
+
+ if (flush)
+ i915_gem_clflush_object(obj, obj->pin_display);
+ }
+
+ if (INTEL_INFO(dev)->gen >= 8) {
+ if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
+ chv_setup_private_ppat(dev_priv);
+ else
+ bdw_setup_private_ppat(dev_priv);
+
+ return;
+ }
+
+ if (USES_PPGTT(dev)) {
+ list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
+ /* TODO: Perhaps it shouldn't be gen6 specific */
+
+ struct i915_hw_ppgtt *ppgtt =
+ container_of(vm, struct i915_hw_ppgtt,
+ base);
+
+ if (i915_is_ggtt(vm))
+ ppgtt = dev_priv->mm.aliasing_ppgtt;
+
+ gen6_write_page_range(dev_priv, &ppgtt->pd,
+ 0, ppgtt->base.total);
+ }
+ }
+
+ i915_ggtt_flush(dev_priv);
+}
+
static struct i915_vma *
__i915_gem_vma_create(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
@@ -2532,7 +3210,8 @@ __i915_gem_vma_create(struct drm_i915_gem_object *obj,
if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
return ERR_PTR(-EINVAL);
- vma = kzalloc(sizeof(*vma), GFP_KERNEL);
+
+ vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
if (vma == NULL)
return ERR_PTR(-ENOMEM);
@@ -2542,22 +3221,8 @@ __i915_gem_vma_create(struct drm_i915_gem_object *obj,
vma->vm = vm;
vma->obj = obj;
- if (INTEL_INFO(vm->dev)->gen >= 6) {
- if (i915_is_ggtt(vm)) {
- vma->ggtt_view = *ggtt_view;
-
- vma->unbind_vma = ggtt_unbind_vma;
- vma->bind_vma = ggtt_bind_vma;
- } else {
- vma->unbind_vma = ppgtt_unbind_vma;
- vma->bind_vma = ppgtt_bind_vma;
- }
- } else {
- BUG_ON(!i915_is_ggtt(vm));
+ if (i915_is_ggtt(vm))
vma->ggtt_view = *ggtt_view;
- vma->unbind_vma = i915_ggtt_unbind_vma;
- vma->bind_vma = i915_ggtt_bind_vma;
- }
list_add_tail(&vma->vma_link, &obj->vma_list);
if (!i915_is_ggtt(vm))
@@ -2602,15 +3267,18 @@ i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
}
-static void
-rotate_pages(dma_addr_t *in, unsigned int width, unsigned int height,
- struct sg_table *st)
+static struct scatterlist *
+rotate_pages(dma_addr_t *in, unsigned int offset,
+ unsigned int width, unsigned int height,
+ struct sg_table *st, struct scatterlist *sg)
{
unsigned int column, row;
unsigned int src_idx;
- struct scatterlist *sg = st->sgl;
- st->nents = 0;
+ if (!sg) {
+ st->nents = 0;
+ sg = st->sgl;
+ }
for (column = 0; column < width; column++) {
src_idx = width * (height - 1) + column;
@@ -2621,51 +3289,49 @@ rotate_pages(dma_addr_t *in, unsigned int width, unsigned int height,
* The only thing we need are DMA addresses.
*/
sg_set_page(sg, NULL, PAGE_SIZE, 0);
- sg_dma_address(sg) = in[src_idx];
+ sg_dma_address(sg) = in[offset + src_idx];
sg_dma_len(sg) = PAGE_SIZE;
sg = sg_next(sg);
src_idx -= width;
}
}
+
+ return sg;
}
static struct sg_table *
intel_rotate_fb_obj_pages(struct i915_ggtt_view *ggtt_view,
struct drm_i915_gem_object *obj)
{
- struct drm_device *dev = obj->base.dev;
struct intel_rotation_info *rot_info = &ggtt_view->rotation_info;
- unsigned long size, pages, rot_pages;
+ unsigned int size_pages = rot_info->size >> PAGE_SHIFT;
+ unsigned int size_pages_uv;
struct sg_page_iter sg_iter;
unsigned long i;
dma_addr_t *page_addr_list;
struct sg_table *st;
- unsigned int tile_pitch, tile_height;
- unsigned int width_pages, height_pages;
+ unsigned int uv_start_page;
+ struct scatterlist *sg;
int ret = -ENOMEM;
- pages = obj->base.size / PAGE_SIZE;
-
- /* Calculate tiling geometry. */
- tile_height = intel_tile_height(dev, rot_info->pixel_format,
- rot_info->fb_modifier);
- tile_pitch = PAGE_SIZE / tile_height;
- width_pages = DIV_ROUND_UP(rot_info->pitch, tile_pitch);
- height_pages = DIV_ROUND_UP(rot_info->height, tile_height);
- rot_pages = width_pages * height_pages;
- size = rot_pages * PAGE_SIZE;
-
/* Allocate a temporary list of source pages for random access. */
- page_addr_list = drm_malloc_ab(pages, sizeof(dma_addr_t));
+ page_addr_list = drm_malloc_ab(obj->base.size / PAGE_SIZE,
+ sizeof(dma_addr_t));
if (!page_addr_list)
return ERR_PTR(ret);
+ /* Account for UV plane with NV12. */
+ if (rot_info->pixel_format == DRM_FORMAT_NV12)
+ size_pages_uv = rot_info->size_uv >> PAGE_SHIFT;
+ else
+ size_pages_uv = 0;
+
/* Allocate target SG list. */
st = kmalloc(sizeof(*st), GFP_KERNEL);
if (!st)
goto err_st_alloc;
- ret = sg_alloc_table(st, rot_pages, GFP_KERNEL);
+ ret = sg_alloc_table(st, size_pages + size_pages_uv, GFP_KERNEL);
if (ret)
goto err_sg_alloc;
@@ -2677,13 +3343,32 @@ intel_rotate_fb_obj_pages(struct i915_ggtt_view *ggtt_view,
}
/* Rotate the pages. */
- rotate_pages(page_addr_list, width_pages, height_pages, st);
+ sg = rotate_pages(page_addr_list, 0,
+ rot_info->width_pages, rot_info->height_pages,
+ st, NULL);
+
+ /* Append the UV plane if NV12. */
+ if (rot_info->pixel_format == DRM_FORMAT_NV12) {
+ uv_start_page = size_pages;
+
+ /* Check for tile-row un-alignment. */
+ if (offset_in_page(rot_info->uv_offset))
+ uv_start_page--;
+
+ rot_info->uv_start_page = uv_start_page;
+
+ rotate_pages(page_addr_list, uv_start_page,
+ rot_info->width_pages_uv,
+ rot_info->height_pages_uv,
+ st, sg);
+ }
DRM_DEBUG_KMS(
- "Created rotated page mapping for object size %lu (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %lu pages).\n",
- size, rot_info->pitch, rot_info->height,
- rot_info->pixel_format, width_pages, height_pages,
- rot_pages);
+ "Created rotated page mapping for object size %zu (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages (%u plane 0)).\n",
+ obj->base.size, rot_info->pitch, rot_info->height,
+ rot_info->pixel_format, rot_info->width_pages,
+ rot_info->height_pages, size_pages + size_pages_uv,
+ size_pages);
drm_free_large(page_addr_list);
@@ -2695,14 +3380,56 @@ err_st_alloc:
drm_free_large(page_addr_list);
DRM_DEBUG_KMS(
- "Failed to create rotated mapping for object size %lu! (%d) (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %lu pages)\n",
- size, ret, rot_info->pitch, rot_info->height,
- rot_info->pixel_format, width_pages, height_pages,
- rot_pages);
+ "Failed to create rotated mapping for object size %zu! (%d) (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages (%u plane 0))\n",
+ obj->base.size, ret, rot_info->pitch, rot_info->height,
+ rot_info->pixel_format, rot_info->width_pages,
+ rot_info->height_pages, size_pages + size_pages_uv,
+ size_pages);
+ return ERR_PTR(ret);
+}
+
+static struct sg_table *
+intel_partial_pages(const struct i915_ggtt_view *view,
+ struct drm_i915_gem_object *obj)
+{
+ struct sg_table *st;
+ struct scatterlist *sg;
+ struct sg_page_iter obj_sg_iter;
+ int ret = -ENOMEM;
+
+ st = kmalloc(sizeof(*st), GFP_KERNEL);
+ if (!st)
+ goto err_st_alloc;
+
+ ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL);
+ if (ret)
+ goto err_sg_alloc;
+
+ sg = st->sgl;
+ st->nents = 0;
+ for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents,
+ view->params.partial.offset)
+ {
+ if (st->nents >= view->params.partial.size)
+ break;
+
+ sg_set_page(sg, NULL, PAGE_SIZE, 0);
+ sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter);
+ sg_dma_len(sg) = PAGE_SIZE;
+
+ sg = sg_next(sg);
+ st->nents++;
+ }
+
+ return st;
+
+err_sg_alloc:
+ kfree(st);
+err_st_alloc:
return ERR_PTR(ret);
}
-static inline int
+static int
i915_get_ggtt_vma_pages(struct i915_vma *vma)
{
int ret = 0;
@@ -2715,6 +3442,9 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
vma->ggtt_view.pages =
intel_rotate_fb_obj_pages(&vma->ggtt_view, vma->obj);
+ else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL)
+ vma->ggtt_view.pages =
+ intel_partial_pages(&vma->ggtt_view, vma->obj);
else
WARN_ONCE(1, "GGTT view %u not implemented!\n",
vma->ggtt_view.type);
@@ -2746,14 +3476,70 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
u32 flags)
{
- if (i915_is_ggtt(vma->vm)) {
- int ret = i915_get_ggtt_vma_pages(vma);
+ int ret;
+ u32 bind_flags;
+ if (WARN_ON(flags == 0))
+ return -EINVAL;
+
+ bind_flags = 0;
+ if (flags & PIN_GLOBAL)
+ bind_flags |= GLOBAL_BIND;
+ if (flags & PIN_USER)
+ bind_flags |= LOCAL_BIND;
+
+ if (flags & PIN_UPDATE)
+ bind_flags |= vma->bound;
+ else
+ bind_flags &= ~vma->bound;
+
+ if (bind_flags == 0)
+ return 0;
+
+ if (vma->bound == 0 && vma->vm->allocate_va_range) {
+ trace_i915_va_alloc(vma->vm,
+ vma->node.start,
+ vma->node.size,
+ VM_TO_TRACE_NAME(vma->vm));
+
+ /* XXX: i915_vma_pin() will fix this +- hack */
+ vma->pin_count++;
+ ret = vma->vm->allocate_va_range(vma->vm,
+ vma->node.start,
+ vma->node.size);
+ vma->pin_count--;
if (ret)
return ret;
}
- vma->bind_vma(vma, cache_level, flags);
+ ret = vma->vm->bind_vma(vma, cache_level, bind_flags);
+ if (ret)
+ return ret;
+
+ vma->bound |= bind_flags;
return 0;
}
+
+/**
+ * i915_ggtt_view_size - Get the size of a GGTT view.
+ * @obj: Object the view is of.
+ * @view: The view in question.
+ *
+ * @return The size of the GGTT view in bytes.
+ */
+size_t
+i915_ggtt_view_size(struct drm_i915_gem_object *obj,
+ const struct i915_ggtt_view *view)
+{
+ if (view->type == I915_GGTT_VIEW_NORMAL) {
+ return obj->base.size;
+ } else if (view->type == I915_GGTT_VIEW_ROTATED) {
+ return view->rotation_info.size;
+ } else if (view->type == I915_GGTT_VIEW_PARTIAL) {
+ return view->params.partial.size << PAGE_SHIFT;
+ } else {
+ WARN_ONCE(1, "GGTT view %u not implemented!\n", view->type);
+ return obj->base.size;
+ }
+}