summaryrefslogtreecommitdiffstats
path: root/kernel/arch/arm64/mm
diff options
context:
space:
mode:
authorJosé Pekkarinen <jose.pekkarinen@nokia.com>2016-04-11 10:41:07 +0300
committerJosé Pekkarinen <jose.pekkarinen@nokia.com>2016-04-13 08:17:18 +0300
commite09b41010ba33a20a87472ee821fa407a5b8da36 (patch)
treed10dc367189862e7ca5c592f033dc3726e1df4e3 /kernel/arch/arm64/mm
parentf93b97fd65072de626c074dbe099a1fff05ce060 (diff)
These changes are the raw update to linux-4.4.6-rt14. Kernel sources
are taken from kernel.org, and rt patch from the rt wiki download page. During the rebasing, the following patch collided: Force tick interrupt and get rid of softirq magic(I70131fb85). Collisions have been removed because its logic was found on the source already. Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769 Signed-off-by: José Pekkarinen <jose.pekkarinen@nokia.com>
Diffstat (limited to 'kernel/arch/arm64/mm')
-rw-r--r--kernel/arch/arm64/mm/Makefile3
-rw-r--r--kernel/arch/arm64/mm/cache.S92
-rw-r--r--kernel/arch/arm64/mm/context.c262
-rw-r--r--kernel/arch/arm64/mm/dma-mapping.c605
-rw-r--r--kernel/arch/arm64/mm/dump.c18
-rw-r--r--kernel/arch/arm64/mm/fault.c69
-rw-r--r--kernel/arch/arm64/mm/flush.c5
-rw-r--r--kernel/arch/arm64/mm/hugetlbpage.c11
-rw-r--r--kernel/arch/arm64/mm/init.c27
-rw-r--r--kernel/arch/arm64/mm/kasan_init.c165
-rw-r--r--kernel/arch/arm64/mm/mmu.c150
-rw-r--r--kernel/arch/arm64/mm/pageattr.c5
-rw-r--r--kernel/arch/arm64/mm/pgd.c2
-rw-r--r--kernel/arch/arm64/mm/proc-macros.S12
-rw-r--r--kernel/arch/arm64/mm/proc.S83
15 files changed, 1145 insertions, 364 deletions
diff --git a/kernel/arch/arm64/mm/Makefile b/kernel/arch/arm64/mm/Makefile
index 773d37a14..57f57fde5 100644
--- a/kernel/arch/arm64/mm/Makefile
+++ b/kernel/arch/arm64/mm/Makefile
@@ -4,3 +4,6 @@ obj-y := dma-mapping.o extable.o fault.o init.o \
context.o proc.o pageattr.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_ARM64_PTDUMP) += dump.o
+
+obj-$(CONFIG_KASAN) += kasan_init.o
+KASAN_SANITIZE_kasan_init.o := n
diff --git a/kernel/arch/arm64/mm/cache.S b/kernel/arch/arm64/mm/cache.S
index 2560e1e15..cfa44a6ad 100644
--- a/kernel/arch/arm64/mm/cache.S
+++ b/kernel/arch/arm64/mm/cache.S
@@ -22,84 +22,11 @@
#include <linux/init.h>
#include <asm/assembler.h>
#include <asm/cpufeature.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
#include "proc-macros.S"
/*
- * __flush_dcache_all()
- *
- * Flush the whole D-cache.
- *
- * Corrupted registers: x0-x7, x9-x11
- */
-__flush_dcache_all:
- dmb sy // ensure ordering with previous memory accesses
- mrs x0, clidr_el1 // read clidr
- and x3, x0, #0x7000000 // extract loc from clidr
- lsr x3, x3, #23 // left align loc bit field
- cbz x3, finished // if loc is 0, then no need to clean
- mov x10, #0 // start clean at cache level 0
-loop1:
- add x2, x10, x10, lsr #1 // work out 3x current cache level
- lsr x1, x0, x2 // extract cache type bits from clidr
- and x1, x1, #7 // mask of the bits for current cache only
- cmp x1, #2 // see what cache we have at this level
- b.lt skip // skip if no cache, or just i-cache
- save_and_disable_irqs x9 // make CSSELR and CCSIDR access atomic
- msr csselr_el1, x10 // select current cache level in csselr
- isb // isb to sych the new cssr&csidr
- mrs x1, ccsidr_el1 // read the new ccsidr
- restore_irqs x9
- and x2, x1, #7 // extract the length of the cache lines
- add x2, x2, #4 // add 4 (line length offset)
- mov x4, #0x3ff
- and x4, x4, x1, lsr #3 // find maximum number on the way size
- clz w5, w4 // find bit position of way size increment
- mov x7, #0x7fff
- and x7, x7, x1, lsr #13 // extract max number of the index size
-loop2:
- mov x9, x4 // create working copy of max way size
-loop3:
- lsl x6, x9, x5
- orr x11, x10, x6 // factor way and cache number into x11
- lsl x6, x7, x2
- orr x11, x11, x6 // factor index number into x11
- dc cisw, x11 // clean & invalidate by set/way
- subs x9, x9, #1 // decrement the way
- b.ge loop3
- subs x7, x7, #1 // decrement the index
- b.ge loop2
-skip:
- add x10, x10, #2 // increment cache number
- cmp x3, x10
- b.gt loop1
-finished:
- mov x10, #0 // swith back to cache level 0
- msr csselr_el1, x10 // select current cache level in csselr
- dsb sy
- isb
- ret
-ENDPROC(__flush_dcache_all)
-
-/*
- * flush_cache_all()
- *
- * Flush the entire cache system. The data cache flush is now achieved
- * using atomic clean / invalidates working outwards from L1 cache. This
- * is done using Set/Way based cache maintainance instructions. The
- * instruction cache can still be invalidated back to the point of
- * unification in a single instruction.
- */
-ENTRY(flush_cache_all)
- mov x12, lr
- bl __flush_dcache_all
- mov x0, #0
- ic ialluis // I+BTB cache invalidate
- ret x12
-ENDPROC(flush_cache_all)
-
-/*
* flush_icache_range(start,end)
*
* Ensure that the I and D caches are coherent within specified region.
@@ -171,7 +98,7 @@ ENTRY(__flush_dcache_area)
b.lo 1b
dsb sy
ret
-ENDPROC(__flush_dcache_area)
+ENDPIPROC(__flush_dcache_area)
/*
* __inval_cache_range(start, end)
@@ -204,7 +131,7 @@ __dma_inv_range:
b.lo 2b
dsb sy
ret
-ENDPROC(__inval_cache_range)
+ENDPIPROC(__inval_cache_range)
ENDPROC(__dma_inv_range)
/*
@@ -216,7 +143,12 @@ __dma_clean_range:
dcache_line_size x2, x3
sub x3, x2, #1
bic x0, x0, x3
-1: alternative_insn "dc cvac, x0", "dc civac, x0", ARM64_WORKAROUND_CLEAN_CACHE
+1:
+alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
+ dc cvac, x0
+alternative_else
+ dc civac, x0
+alternative_endif
add x0, x0, x2
cmp x0, x1
b.lo 1b
@@ -239,7 +171,7 @@ ENTRY(__dma_flush_range)
b.lo 1b
dsb sy
ret
-ENDPROC(__dma_flush_range)
+ENDPIPROC(__dma_flush_range)
/*
* __dma_map_area(start, size, dir)
@@ -252,7 +184,7 @@ ENTRY(__dma_map_area)
cmp w2, #DMA_FROM_DEVICE
b.eq __dma_inv_range
b __dma_clean_range
-ENDPROC(__dma_map_area)
+ENDPIPROC(__dma_map_area)
/*
* __dma_unmap_area(start, size, dir)
@@ -265,4 +197,4 @@ ENTRY(__dma_unmap_area)
cmp w2, #DMA_TO_DEVICE
b.ne __dma_inv_range
ret
-ENDPROC(__dma_unmap_area)
+ENDPIPROC(__dma_unmap_area)
diff --git a/kernel/arch/arm64/mm/context.c b/kernel/arch/arm64/mm/context.c
index 76c1e6cd3..e87f53ff5 100644
--- a/kernel/arch/arm64/mm/context.c
+++ b/kernel/arch/arm64/mm/context.c
@@ -17,151 +17,199 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include <linux/init.h>
+#include <linux/bitops.h>
#include <linux/sched.h>
+#include <linux/slab.h>
#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/percpu.h>
+#include <asm/cpufeature.h>
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
-#include <asm/cachetype.h>
-
-#define asid_bits(reg) \
- (((read_cpuid(ID_AA64MMFR0_EL1) & 0xf0) >> 2) + 8)
-
-#define ASID_FIRST_VERSION (1 << MAX_ASID_BITS)
+static u32 asid_bits;
static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
-unsigned int cpu_last_asid = ASID_FIRST_VERSION;
-/*
- * We fork()ed a process, and we need a new context for the child to run in.
- */
-void __init_new_context(struct task_struct *tsk, struct mm_struct *mm)
-{
- mm->context.id = 0;
- raw_spin_lock_init(&mm->context.id_lock);
-}
+static atomic64_t asid_generation;
+static unsigned long *asid_map;
-static void flush_context(void)
-{
- /* set the reserved TTBR0 before flushing the TLB */
- cpu_set_reserved_ttbr0();
- flush_tlb_all();
- if (icache_is_aivivt())
- __flush_icache_all();
-}
+static DEFINE_PER_CPU(atomic64_t, active_asids);
+static DEFINE_PER_CPU(u64, reserved_asids);
+static cpumask_t tlb_flush_pending;
-#ifdef CONFIG_SMP
+#define ASID_MASK (~GENMASK(asid_bits - 1, 0))
+#define ASID_FIRST_VERSION (1UL << asid_bits)
+#define NUM_USER_ASIDS ASID_FIRST_VERSION
-static void set_mm_context(struct mm_struct *mm, unsigned int asid)
+static void flush_context(unsigned int cpu)
{
- unsigned long flags;
+ int i;
+ u64 asid;
+
+ /* Update the list of reserved ASIDs and the ASID bitmap. */
+ bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
/*
- * Locking needed for multi-threaded applications where the same
- * mm->context.id could be set from different CPUs during the
- * broadcast. This function is also called via IPI so the
- * mm->context.id_lock has to be IRQ-safe.
+ * Ensure the generation bump is observed before we xchg the
+ * active_asids.
*/
- raw_spin_lock_irqsave(&mm->context.id_lock, flags);
- if (likely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) {
+ smp_wmb();
+
+ for_each_possible_cpu(i) {
+ asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
/*
- * Old version of ASID found. Set the new one and reset
- * mm_cpumask(mm).
+ * If this CPU has already been through a
+ * rollover, but hasn't run another task in
+ * the meantime, we must preserve its reserved
+ * ASID, as this is the only trace we have of
+ * the process it is still running.
*/
- mm->context.id = asid;
- cpumask_clear(mm_cpumask(mm));
+ if (asid == 0)
+ asid = per_cpu(reserved_asids, i);
+ __set_bit(asid & ~ASID_MASK, asid_map);
+ per_cpu(reserved_asids, i) = asid;
}
- raw_spin_unlock_irqrestore(&mm->context.id_lock, flags);
- /*
- * Set the mm_cpumask(mm) bit for the current CPU.
- */
- cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+ /* Queue a TLB invalidate and flush the I-cache if necessary. */
+ cpumask_setall(&tlb_flush_pending);
+
+ if (icache_is_aivivt())
+ __flush_icache_all();
}
-/*
- * Reset the ASID on the current CPU. This function call is broadcast from the
- * CPU handling the ASID rollover and holding cpu_asid_lock.
- */
-static void reset_context(void *info)
+static bool check_update_reserved_asid(u64 asid, u64 newasid)
{
- unsigned int asid;
- unsigned int cpu = smp_processor_id();
- struct mm_struct *mm = current->active_mm;
+ int cpu;
+ bool hit = false;
/*
- * current->active_mm could be init_mm for the idle thread immediately
- * after secondary CPU boot or hotplug. TTBR0_EL1 is already set to
- * the reserved value, so no need to reset any context.
+ * Iterate over the set of reserved ASIDs looking for a match.
+ * If we find one, then we can update our mm to use newasid
+ * (i.e. the same ASID in the current generation) but we can't
+ * exit the loop early, since we need to ensure that all copies
+ * of the old ASID are updated to reflect the mm. Failure to do
+ * so could result in us missing the reserved ASID in a future
+ * generation.
*/
- if (mm == &init_mm)
- return;
-
- smp_rmb();
- asid = cpu_last_asid + cpu;
-
- flush_context();
- set_mm_context(mm, asid);
+ for_each_possible_cpu(cpu) {
+ if (per_cpu(reserved_asids, cpu) == asid) {
+ hit = true;
+ per_cpu(reserved_asids, cpu) = newasid;
+ }
+ }
- /* set the new ASID */
- cpu_switch_mm(mm->pgd, mm);
+ return hit;
}
-#else
-
-static inline void set_mm_context(struct mm_struct *mm, unsigned int asid)
+static u64 new_context(struct mm_struct *mm, unsigned int cpu)
{
- mm->context.id = asid;
- cpumask_copy(mm_cpumask(mm), cpumask_of(smp_processor_id()));
-}
+ static u32 cur_idx = 1;
+ u64 asid = atomic64_read(&mm->context.id);
+ u64 generation = atomic64_read(&asid_generation);
-#endif
+ if (asid != 0) {
+ u64 newasid = generation | (asid & ~ASID_MASK);
-void __new_context(struct mm_struct *mm)
-{
- unsigned int asid;
- unsigned int bits = asid_bits();
+ /*
+ * If our current ASID was active during a rollover, we
+ * can continue to use it and this was just a false alarm.
+ */
+ if (check_update_reserved_asid(asid, newasid))
+ return newasid;
- raw_spin_lock(&cpu_asid_lock);
-#ifdef CONFIG_SMP
- /*
- * Check the ASID again, in case the change was broadcast from another
- * CPU before we acquired the lock.
- */
- if (!unlikely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) {
- cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
- raw_spin_unlock(&cpu_asid_lock);
- return;
+ /*
+ * We had a valid ASID in a previous life, so try to re-use
+ * it if possible.
+ */
+ asid &= ~ASID_MASK;
+ if (!__test_and_set_bit(asid, asid_map))
+ return newasid;
}
-#endif
+
/*
- * At this point, it is guaranteed that the current mm (with an old
- * ASID) isn't active on any other CPU since the ASIDs are changed
- * simultaneously via IPI.
+ * Allocate a free ASID. If we can't find one, take a note of the
+ * currently active ASIDs and mark the TLBs as requiring flushes.
+ * We always count from ASID #1, as we use ASID #0 when setting a
+ * reserved TTBR0 for the init_mm.
*/
- asid = ++cpu_last_asid;
+ asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
+ if (asid != NUM_USER_ASIDS)
+ goto set_asid;
+
+ /* We're out of ASIDs, so increment the global generation count */
+ generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION,
+ &asid_generation);
+ flush_context(cpu);
+
+ /* We have at least 1 ASID per CPU, so this will always succeed */
+ asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
+
+set_asid:
+ __set_bit(asid, asid_map);
+ cur_idx = asid;
+ return asid | generation;
+}
+
+void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
+{
+ unsigned long flags;
+ u64 asid;
+
+ asid = atomic64_read(&mm->context.id);
/*
- * If we've used up all our ASIDs, we need to start a new version and
- * flush the TLB.
+ * The memory ordering here is subtle. We rely on the control
+ * dependency between the generation read and the update of
+ * active_asids to ensure that we are synchronised with a
+ * parallel rollover (i.e. this pairs with the smp_wmb() in
+ * flush_context).
*/
- if (unlikely((asid & ((1 << bits) - 1)) == 0)) {
- /* increment the ASID version */
- cpu_last_asid += (1 << MAX_ASID_BITS) - (1 << bits);
- if (cpu_last_asid == 0)
- cpu_last_asid = ASID_FIRST_VERSION;
- asid = cpu_last_asid + smp_processor_id();
- flush_context();
-#ifdef CONFIG_SMP
- smp_wmb();
- smp_call_function(reset_context, NULL, 1);
-#endif
- cpu_last_asid += NR_CPUS - 1;
+ if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits)
+ && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid))
+ goto switch_mm_fastpath;
+
+ raw_spin_lock_irqsave(&cpu_asid_lock, flags);
+ /* Check that our ASID belongs to the current generation. */
+ asid = atomic64_read(&mm->context.id);
+ if ((asid ^ atomic64_read(&asid_generation)) >> asid_bits) {
+ asid = new_context(mm, cpu);
+ atomic64_set(&mm->context.id, asid);
+ }
+
+ if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
+ local_flush_tlb_all();
+
+ atomic64_set(&per_cpu(active_asids, cpu), asid);
+ raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
+
+switch_mm_fastpath:
+ cpu_switch_mm(mm->pgd, mm);
+}
+
+static int asids_init(void)
+{
+ int fld = cpuid_feature_extract_field(read_cpuid(ID_AA64MMFR0_EL1), 4);
+
+ switch (fld) {
+ default:
+ pr_warn("Unknown ASID size (%d); assuming 8-bit\n", fld);
+ /* Fallthrough */
+ case 0:
+ asid_bits = 8;
+ break;
+ case 2:
+ asid_bits = 16;
}
- set_mm_context(mm, asid);
- raw_spin_unlock(&cpu_asid_lock);
+ /* If we end up with more CPUs than ASIDs, expect things to crash */
+ WARN_ON(NUM_USER_ASIDS < num_possible_cpus());
+ atomic64_set(&asid_generation, ASID_FIRST_VERSION);
+ asid_map = kzalloc(BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(*asid_map),
+ GFP_KERNEL);
+ if (!asid_map)
+ panic("Failed to allocate bitmap for %lu ASIDs\n",
+ NUM_USER_ASIDS);
+
+ pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS);
+ return 0;
}
+early_initcall(asids_init);
diff --git a/kernel/arch/arm64/mm/dma-mapping.c b/kernel/arch/arm64/mm/dma-mapping.c
index b0bd4e5fd..354144e33 100644
--- a/kernel/arch/arm64/mm/dma-mapping.c
+++ b/kernel/arch/arm64/mm/dma-mapping.c
@@ -18,6 +18,7 @@
*/
#include <linux/gfp.h>
+#include <linux/acpi.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/genalloc.h>
@@ -28,9 +29,6 @@
#include <asm/cacheflush.h>
-struct dma_map_ops *dma_ops;
-EXPORT_SYMBOL(dma_ops);
-
static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot,
bool coherent)
{
@@ -100,7 +98,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
if (IS_ENABLED(CONFIG_ZONE_DMA) &&
dev->coherent_dma_mask <= DMA_BIT_MASK(32))
flags |= GFP_DMA;
- if (IS_ENABLED(CONFIG_DMA_CMA) && (flags & __GFP_WAIT)) {
+ if (dev_get_cma_area(dev) && gfpflags_allow_blocking(flags)) {
struct page *page;
void *addr;
@@ -144,10 +142,11 @@ static void *__dma_alloc(struct device *dev, size_t size,
struct page *page;
void *ptr, *coherent_ptr;
bool coherent = is_device_dma_coherent(dev);
+ pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, false);
size = PAGE_ALIGN(size);
- if (!coherent && !(flags & __GFP_WAIT)) {
+ if (!coherent && !gfpflags_allow_blocking(flags)) {
struct page *page = NULL;
void *addr = __alloc_from_pool(size, &page, flags);
@@ -171,9 +170,7 @@ static void *__dma_alloc(struct device *dev, size_t size,
/* create a coherent mapping */
page = virt_to_page(ptr);
coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP,
- __get_dma_pgprot(attrs,
- __pgprot(PROT_NORMAL_NC), false),
- NULL);
+ prot, NULL);
if (!coherent_ptr)
goto no_map;
@@ -303,9 +300,10 @@ static void __swiotlb_sync_sg_for_device(struct device *dev,
sg->length, dir);
}
-/* vma->vm_page_prot must be set appropriately before calling this function */
-static int __dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
- void *cpu_addr, dma_addr_t dma_addr, size_t size)
+static int __swiotlb_mmap(struct device *dev,
+ struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size,
+ struct dma_attrs *attrs)
{
int ret = -ENXIO;
unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >>
@@ -314,6 +312,9 @@ static int __dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT;
unsigned long off = vma->vm_pgoff;
+ vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
+ is_device_dma_coherent(dev));
+
if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
return ret;
@@ -327,20 +328,24 @@ static int __dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
return ret;
}
-static int __swiotlb_mmap(struct device *dev,
- struct vm_area_struct *vma,
- void *cpu_addr, dma_addr_t dma_addr, size_t size,
- struct dma_attrs *attrs)
+static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
+ void *cpu_addr, dma_addr_t handle, size_t size,
+ struct dma_attrs *attrs)
{
- vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
- is_device_dma_coherent(dev));
- return __dma_common_mmap(dev, vma, cpu_addr, dma_addr, size);
+ int ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
+
+ if (!ret)
+ sg_set_page(sgt->sgl, phys_to_page(dma_to_phys(dev, handle)),
+ PAGE_ALIGN(size), 0);
+
+ return ret;
}
static struct dma_map_ops swiotlb_dma_ops = {
.alloc = __dma_alloc,
.free = __dma_free,
.mmap = __swiotlb_mmap,
+ .get_sgtable = __swiotlb_get_sgtable,
.map_page = __swiotlb_map_page,
.unmap_page = __swiotlb_unmap_page,
.map_sg = __swiotlb_map_sg_attrs,
@@ -414,15 +419,101 @@ out:
return -ENOMEM;
}
-static int __init arm64_dma_init(void)
+/********************************************
+ * The following APIs are for dummy DMA ops *
+ ********************************************/
+
+static void *__dummy_alloc(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flags,
+ struct dma_attrs *attrs)
{
- int ret;
+ return NULL;
+}
- dma_ops = &swiotlb_dma_ops;
+static void __dummy_free(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
+{
+}
- ret = atomic_pool_init();
+static int __dummy_mmap(struct device *dev,
+ struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size,
+ struct dma_attrs *attrs)
+{
+ return -ENXIO;
+}
- return ret;
+static dma_addr_t __dummy_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ return DMA_ERROR_CODE;
+}
+
+static void __dummy_unmap_page(struct device *dev, dma_addr_t dev_addr,
+ size_t size, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+}
+
+static int __dummy_map_sg(struct device *dev, struct scatterlist *sgl,
+ int nelems, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ return 0;
+}
+
+static void __dummy_unmap_sg(struct device *dev,
+ struct scatterlist *sgl, int nelems,
+ enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+}
+
+static void __dummy_sync_single(struct device *dev,
+ dma_addr_t dev_addr, size_t size,
+ enum dma_data_direction dir)
+{
+}
+
+static void __dummy_sync_sg(struct device *dev,
+ struct scatterlist *sgl, int nelems,
+ enum dma_data_direction dir)
+{
+}
+
+static int __dummy_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
+{
+ return 1;
+}
+
+static int __dummy_dma_supported(struct device *hwdev, u64 mask)
+{
+ return 0;
+}
+
+struct dma_map_ops dummy_dma_ops = {
+ .alloc = __dummy_alloc,
+ .free = __dummy_free,
+ .mmap = __dummy_mmap,
+ .map_page = __dummy_map_page,
+ .unmap_page = __dummy_unmap_page,
+ .map_sg = __dummy_map_sg,
+ .unmap_sg = __dummy_unmap_sg,
+ .sync_single_for_cpu = __dummy_sync_single,
+ .sync_single_for_device = __dummy_sync_single,
+ .sync_sg_for_cpu = __dummy_sync_sg,
+ .sync_sg_for_device = __dummy_sync_sg,
+ .mapping_error = __dummy_mapping_error,
+ .dma_supported = __dummy_dma_supported,
+};
+EXPORT_SYMBOL(dummy_dma_ops);
+
+static int __init arm64_dma_init(void)
+{
+ return atomic_pool_init();
}
arch_initcall(arm64_dma_init);
@@ -434,3 +525,471 @@ static int __init dma_debug_do_init(void)
return 0;
}
fs_initcall(dma_debug_do_init);
+
+
+#ifdef CONFIG_IOMMU_DMA
+#include <linux/dma-iommu.h>
+#include <linux/platform_device.h>
+#include <linux/amba/bus.h>
+
+/* Thankfully, all cache ops are by VA so we can ignore phys here */
+static void flush_page(struct device *dev, const void *virt, phys_addr_t phys)
+{
+ __dma_flush_range(virt, virt + PAGE_SIZE);
+}
+
+static void *__iommu_alloc_attrs(struct device *dev, size_t size,
+ dma_addr_t *handle, gfp_t gfp,
+ struct dma_attrs *attrs)
+{
+ bool coherent = is_device_dma_coherent(dev);
+ int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent);
+ size_t iosize = size;
+ void *addr;
+
+ if (WARN(!dev, "cannot create IOMMU mapping for unknown device\n"))
+ return NULL;
+
+ size = PAGE_ALIGN(size);
+
+ /*
+ * Some drivers rely on this, and we probably don't want the
+ * possibility of stale kernel data being read by devices anyway.
+ */
+ gfp |= __GFP_ZERO;
+
+ if (gfpflags_allow_blocking(gfp)) {
+ struct page **pages;
+ pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent);
+
+ pages = iommu_dma_alloc(dev, iosize, gfp, ioprot, handle,
+ flush_page);
+ if (!pages)
+ return NULL;
+
+ addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot,
+ __builtin_return_address(0));
+ if (!addr)
+ iommu_dma_free(dev, pages, iosize, handle);
+ } else {
+ struct page *page;
+ /*
+ * In atomic context we can't remap anything, so we'll only
+ * get the virtually contiguous buffer we need by way of a
+ * physically contiguous allocation.
+ */
+ if (coherent) {
+ page = alloc_pages(gfp, get_order(size));
+ addr = page ? page_address(page) : NULL;
+ } else {
+ addr = __alloc_from_pool(size, &page, gfp);
+ }
+ if (!addr)
+ return NULL;
+
+ *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot);
+ if (iommu_dma_mapping_error(dev, *handle)) {
+ if (coherent)
+ __free_pages(page, get_order(size));
+ else
+ __free_from_pool(addr, size);
+ addr = NULL;
+ }
+ }
+ return addr;
+}
+
+static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t handle, struct dma_attrs *attrs)
+{
+ size_t iosize = size;
+
+ size = PAGE_ALIGN(size);
+ /*
+ * @cpu_addr will be one of 3 things depending on how it was allocated:
+ * - A remapped array of pages from iommu_dma_alloc(), for all
+ * non-atomic allocations.
+ * - A non-cacheable alias from the atomic pool, for atomic
+ * allocations by non-coherent devices.
+ * - A normal lowmem address, for atomic allocations by
+ * coherent devices.
+ * Hence how dodgy the below logic looks...
+ */
+ if (__in_atomic_pool(cpu_addr, size)) {
+ iommu_dma_unmap_page(dev, handle, iosize, 0, NULL);
+ __free_from_pool(cpu_addr, size);
+ } else if (is_vmalloc_addr(cpu_addr)){
+ struct vm_struct *area = find_vm_area(cpu_addr);
+
+ if (WARN_ON(!area || !area->pages))
+ return;
+ iommu_dma_free(dev, area->pages, iosize, &handle);
+ dma_common_free_remap(cpu_addr, size, VM_USERMAP);
+ } else {
+ iommu_dma_unmap_page(dev, handle, iosize, 0, NULL);
+ __free_pages(virt_to_page(cpu_addr), get_order(size));
+ }
+}
+
+static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size,
+ struct dma_attrs *attrs)
+{
+ struct vm_struct *area;
+ int ret;
+
+ vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
+ is_device_dma_coherent(dev));
+
+ if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
+ return ret;
+
+ area = find_vm_area(cpu_addr);
+ if (WARN_ON(!area || !area->pages))
+ return -ENXIO;
+
+ return iommu_dma_mmap(area->pages, size, vma);
+}
+
+static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
+ void *cpu_addr, dma_addr_t dma_addr,
+ size_t size, struct dma_attrs *attrs)
+{
+ unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ struct vm_struct *area = find_vm_area(cpu_addr);
+
+ if (WARN_ON(!area || !area->pages))
+ return -ENXIO;
+
+ return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size,
+ GFP_KERNEL);
+}
+
+static void __iommu_sync_single_for_cpu(struct device *dev,
+ dma_addr_t dev_addr, size_t size,
+ enum dma_data_direction dir)
+{
+ phys_addr_t phys;
+
+ if (is_device_dma_coherent(dev))
+ return;
+
+ phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr);
+ __dma_unmap_area(phys_to_virt(phys), size, dir);
+}
+
+static void __iommu_sync_single_for_device(struct device *dev,
+ dma_addr_t dev_addr, size_t size,
+ enum dma_data_direction dir)
+{
+ phys_addr_t phys;
+
+ if (is_device_dma_coherent(dev))
+ return;
+
+ phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr);
+ __dma_map_area(phys_to_virt(phys), size, dir);
+}
+
+static dma_addr_t __iommu_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ bool coherent = is_device_dma_coherent(dev);
+ int prot = dma_direction_to_prot(dir, coherent);
+ dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot);
+
+ if (!iommu_dma_mapping_error(dev, dev_addr) &&
+ !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+ __iommu_sync_single_for_device(dev, dev_addr, size, dir);
+
+ return dev_addr;
+}
+
+static void __iommu_unmap_page(struct device *dev, dma_addr_t dev_addr,
+ size_t size, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+ __iommu_sync_single_for_cpu(dev, dev_addr, size, dir);
+
+ iommu_dma_unmap_page(dev, dev_addr, size, dir, attrs);
+}
+
+static void __iommu_sync_sg_for_cpu(struct device *dev,
+ struct scatterlist *sgl, int nelems,
+ enum dma_data_direction dir)
+{
+ struct scatterlist *sg;
+ int i;
+
+ if (is_device_dma_coherent(dev))
+ return;
+
+ for_each_sg(sgl, sg, nelems, i)
+ __dma_unmap_area(sg_virt(sg), sg->length, dir);
+}
+
+static void __iommu_sync_sg_for_device(struct device *dev,
+ struct scatterlist *sgl, int nelems,
+ enum dma_data_direction dir)
+{
+ struct scatterlist *sg;
+ int i;
+
+ if (is_device_dma_coherent(dev))
+ return;
+
+ for_each_sg(sgl, sg, nelems, i)
+ __dma_map_area(sg_virt(sg), sg->length, dir);
+}
+
+static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
+ int nelems, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ bool coherent = is_device_dma_coherent(dev);
+
+ if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+ __iommu_sync_sg_for_device(dev, sgl, nelems, dir);
+
+ return iommu_dma_map_sg(dev, sgl, nelems,
+ dma_direction_to_prot(dir, coherent));
+}
+
+static void __iommu_unmap_sg_attrs(struct device *dev,
+ struct scatterlist *sgl, int nelems,
+ enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs))
+ __iommu_sync_sg_for_cpu(dev, sgl, nelems, dir);
+
+ iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs);
+}
+
+static struct dma_map_ops iommu_dma_ops = {
+ .alloc = __iommu_alloc_attrs,
+ .free = __iommu_free_attrs,
+ .mmap = __iommu_mmap_attrs,
+ .get_sgtable = __iommu_get_sgtable,
+ .map_page = __iommu_map_page,
+ .unmap_page = __iommu_unmap_page,
+ .map_sg = __iommu_map_sg_attrs,
+ .unmap_sg = __iommu_unmap_sg_attrs,
+ .sync_single_for_cpu = __iommu_sync_single_for_cpu,
+ .sync_single_for_device = __iommu_sync_single_for_device,
+ .sync_sg_for_cpu = __iommu_sync_sg_for_cpu,
+ .sync_sg_for_device = __iommu_sync_sg_for_device,
+ .dma_supported = iommu_dma_supported,
+ .mapping_error = iommu_dma_mapping_error,
+};
+
+/*
+ * TODO: Right now __iommu_setup_dma_ops() gets called too early to do
+ * everything it needs to - the device is only partially created and the
+ * IOMMU driver hasn't seen it yet, so it can't have a group. Thus we
+ * need this delayed attachment dance. Once IOMMU probe ordering is sorted
+ * to move the arch_setup_dma_ops() call later, all the notifier bits below
+ * become unnecessary, and will go away.
+ */
+struct iommu_dma_notifier_data {
+ struct list_head list;
+ struct device *dev;
+ const struct iommu_ops *ops;
+ u64 dma_base;
+ u64 size;
+};
+static LIST_HEAD(iommu_dma_masters);
+static DEFINE_MUTEX(iommu_dma_notifier_lock);
+
+/*
+ * Temporarily "borrow" a domain feature flag to to tell if we had to resort
+ * to creating our own domain here, in case we need to clean it up again.
+ */
+#define __IOMMU_DOMAIN_FAKE_DEFAULT (1U << 31)
+
+static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops,
+ u64 dma_base, u64 size)
+{
+ struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+
+ /*
+ * Best case: The device is either part of a group which was
+ * already attached to a domain in a previous call, or it's
+ * been put in a default DMA domain by the IOMMU core.
+ */
+ if (!domain) {
+ /*
+ * Urgh. The IOMMU core isn't going to do default domains
+ * for non-PCI devices anyway, until it has some means of
+ * abstracting the entirely implementation-specific
+ * sideband data/SoC topology/unicorn dust that may or
+ * may not differentiate upstream masters.
+ * So until then, HORRIBLE HACKS!
+ */
+ domain = ops->domain_alloc(IOMMU_DOMAIN_DMA);
+ if (!domain)
+ goto out_no_domain;
+
+ domain->ops = ops;
+ domain->type = IOMMU_DOMAIN_DMA | __IOMMU_DOMAIN_FAKE_DEFAULT;
+
+ if (iommu_attach_device(domain, dev))
+ goto out_put_domain;
+ }
+
+ if (iommu_dma_init_domain(domain, dma_base, size))
+ goto out_detach;
+
+ dev->archdata.dma_ops = &iommu_dma_ops;
+ return true;
+
+out_detach:
+ iommu_detach_device(domain, dev);
+out_put_domain:
+ if (domain->type & __IOMMU_DOMAIN_FAKE_DEFAULT)
+ iommu_domain_free(domain);
+out_no_domain:
+ pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
+ dev_name(dev));
+ return false;
+}
+
+static void queue_iommu_attach(struct device *dev, const struct iommu_ops *ops,
+ u64 dma_base, u64 size)
+{
+ struct iommu_dma_notifier_data *iommudata;
+
+ iommudata = kzalloc(sizeof(*iommudata), GFP_KERNEL);
+ if (!iommudata)
+ return;
+
+ iommudata->dev = dev;
+ iommudata->ops = ops;
+ iommudata->dma_base = dma_base;
+ iommudata->size = size;
+
+ mutex_lock(&iommu_dma_notifier_lock);
+ list_add(&iommudata->list, &iommu_dma_masters);
+ mutex_unlock(&iommu_dma_notifier_lock);
+}
+
+static int __iommu_attach_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct iommu_dma_notifier_data *master, *tmp;
+
+ if (action != BUS_NOTIFY_ADD_DEVICE)
+ return 0;
+
+ mutex_lock(&iommu_dma_notifier_lock);
+ list_for_each_entry_safe(master, tmp, &iommu_dma_masters, list) {
+ if (do_iommu_attach(master->dev, master->ops,
+ master->dma_base, master->size)) {
+ list_del(&master->list);
+ kfree(master);
+ }
+ }
+ mutex_unlock(&iommu_dma_notifier_lock);
+ return 0;
+}
+
+static int register_iommu_dma_ops_notifier(struct bus_type *bus)
+{
+ struct notifier_block *nb = kzalloc(sizeof(*nb), GFP_KERNEL);
+ int ret;
+
+ if (!nb)
+ return -ENOMEM;
+ /*
+ * The device must be attached to a domain before the driver probe
+ * routine gets a chance to start allocating DMA buffers. However,
+ * the IOMMU driver also needs a chance to configure the iommu_group
+ * via its add_device callback first, so we need to make the attach
+ * happen between those two points. Since the IOMMU core uses a bus
+ * notifier with default priority for add_device, do the same but
+ * with a lower priority to ensure the appropriate ordering.
+ */
+ nb->notifier_call = __iommu_attach_notifier;
+ nb->priority = -100;
+
+ ret = bus_register_notifier(bus, nb);
+ if (ret) {
+ pr_warn("Failed to register DMA domain notifier; IOMMU DMA ops unavailable on bus '%s'\n",
+ bus->name);
+ kfree(nb);
+ }
+ return ret;
+}
+
+static int __init __iommu_dma_init(void)
+{
+ int ret;
+
+ ret = iommu_dma_init();
+ if (!ret)
+ ret = register_iommu_dma_ops_notifier(&platform_bus_type);
+ if (!ret)
+ ret = register_iommu_dma_ops_notifier(&amba_bustype);
+
+ /* handle devices queued before this arch_initcall */
+ if (!ret)
+ __iommu_attach_notifier(NULL, BUS_NOTIFY_ADD_DEVICE, NULL);
+ return ret;
+}
+arch_initcall(__iommu_dma_init);
+
+static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+ const struct iommu_ops *ops)
+{
+ struct iommu_group *group;
+
+ if (!ops)
+ return;
+ /*
+ * TODO: As a concession to the future, we're ready to handle being
+ * called both early and late (i.e. after bus_add_device). Once all
+ * the platform bus code is reworked to call us late and the notifier
+ * junk above goes away, move the body of do_iommu_attach here.
+ */
+ group = iommu_group_get(dev);
+ if (group) {
+ do_iommu_attach(dev, ops, dma_base, size);
+ iommu_group_put(group);
+ } else {
+ queue_iommu_attach(dev, ops, dma_base, size);
+ }
+}
+
+void arch_teardown_dma_ops(struct device *dev)
+{
+ struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+
+ if (domain) {
+ iommu_detach_device(domain, dev);
+ if (domain->type & __IOMMU_DOMAIN_FAKE_DEFAULT)
+ iommu_domain_free(domain);
+ }
+
+ dev->archdata.dma_ops = NULL;
+}
+
+#else
+
+static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+ struct iommu_ops *iommu)
+{ }
+
+#endif /* CONFIG_IOMMU_DMA */
+
+void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+ struct iommu_ops *iommu, bool coherent)
+{
+ if (!dev->archdata.dma_ops)
+ dev->archdata.dma_ops = &swiotlb_dma_ops;
+
+ dev->archdata.dma_coherent = coherent;
+ __iommu_setup_dma_ops(dev, dma_base, size, iommu);
+}
diff --git a/kernel/arch/arm64/mm/dump.c b/kernel/arch/arm64/mm/dump.c
index f3d6221cd..5a22a119a 100644
--- a/kernel/arch/arm64/mm/dump.c
+++ b/kernel/arch/arm64/mm/dump.c
@@ -67,6 +67,12 @@ static struct addr_marker address_markers[] = {
{ -1, NULL },
};
+/*
+ * The page dumper groups page table entries of the same type into a single
+ * description. It uses pg_state to track the range information while
+ * iterating over the pte entries. When the continuity is broken it then
+ * dumps out a description of the range.
+ */
struct pg_state {
struct seq_file *seq;
const struct addr_marker *marker;
@@ -114,6 +120,16 @@ static const struct prot_bits pte_bits[] = {
.set = "NG",
.clear = " ",
}, {
+ .mask = PTE_CONT,
+ .val = PTE_CONT,
+ .set = "CON",
+ .clear = " ",
+ }, {
+ .mask = PTE_TABLE_BIT,
+ .val = PTE_TABLE_BIT,
+ .set = " ",
+ .clear = "BLK",
+ }, {
.mask = PTE_UXN,
.val = PTE_UXN,
.set = "UXN",
@@ -198,7 +214,7 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
unsigned long delta;
if (st->current_prot) {
- seq_printf(st->seq, "0x%16lx-0x%16lx ",
+ seq_printf(st->seq, "0x%016lx-0x%016lx ",
st->start_address, addr);
delta = (addr - st->start_address) >> 10;
diff --git a/kernel/arch/arm64/mm/fault.c b/kernel/arch/arm64/mm/fault.c
index 0948d327d..92ddac1e8 100644
--- a/kernel/arch/arm64/mm/fault.c
+++ b/kernel/arch/arm64/mm/fault.c
@@ -30,9 +30,11 @@
#include <linux/highmem.h>
#include <linux/perf_event.h>
+#include <asm/cpufeature.h>
#include <asm/exception.h>
#include <asm/debug-monitors.h>
#include <asm/esr.h>
+#include <asm/sysreg.h>
#include <asm/system_misc.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
@@ -115,8 +117,7 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
{
struct siginfo si;
- if (show_unhandled_signals && unhandled_signal(tsk, sig) &&
- printk_ratelimit()) {
+ if (unhandled_signal(tsk, sig) && show_unhandled_signals_ratelimited()) {
pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n",
tsk->comm, task_pid_nr(tsk), fault_name(esr), sig,
addr, esr);
@@ -225,6 +226,13 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
}
/*
+ * PAN bit set implies the fault happened in kernel space, but not
+ * in the arch's user access functions.
+ */
+ if (IS_ENABLED(CONFIG_ARM64_PAN) && (regs->pstate & PSR_PAN_BIT))
+ goto no_context;
+
+ /*
* As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code,
* we can bug out early if this is from code which shouldn't.
@@ -279,6 +287,7 @@ retry:
* starvation.
*/
mm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ mm_flags |= FAULT_FLAG_TRIED;
goto retry;
}
}
@@ -384,16 +393,16 @@ static struct fault_info {
{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" },
{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" },
{ do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" },
- { do_bad, SIGBUS, 0, "reserved access flag fault" },
+ { do_bad, SIGBUS, 0, "unknown 8" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" },
- { do_bad, SIGBUS, 0, "reserved permission fault" },
+ { do_bad, SIGBUS, 0, "unknown 12" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" },
{ do_bad, SIGBUS, 0, "synchronous external abort" },
- { do_bad, SIGBUS, 0, "asynchronous external abort" },
+ { do_bad, SIGBUS, 0, "unknown 17" },
{ do_bad, SIGBUS, 0, "unknown 18" },
{ do_bad, SIGBUS, 0, "unknown 19" },
{ do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" },
@@ -401,16 +410,16 @@ static struct fault_info {
{ do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous parity error" },
- { do_bad, SIGBUS, 0, "asynchronous parity error" },
+ { do_bad, SIGBUS, 0, "unknown 25" },
{ do_bad, SIGBUS, 0, "unknown 26" },
{ do_bad, SIGBUS, 0, "unknown 27" },
- { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" },
- { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" },
- { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" },
- { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" },
+ { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
+ { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
+ { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
+ { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
{ do_bad, SIGBUS, 0, "unknown 32" },
{ do_bad, SIGBUS, BUS_ADRALN, "alignment fault" },
- { do_bad, SIGBUS, 0, "debug event" },
+ { do_bad, SIGBUS, 0, "unknown 34" },
{ do_bad, SIGBUS, 0, "unknown 35" },
{ do_bad, SIGBUS, 0, "unknown 36" },
{ do_bad, SIGBUS, 0, "unknown 37" },
@@ -424,21 +433,21 @@ static struct fault_info {
{ do_bad, SIGBUS, 0, "unknown 45" },
{ do_bad, SIGBUS, 0, "unknown 46" },
{ do_bad, SIGBUS, 0, "unknown 47" },
- { do_bad, SIGBUS, 0, "unknown 48" },
+ { do_bad, SIGBUS, 0, "TLB conflict abort" },
{ do_bad, SIGBUS, 0, "unknown 49" },
{ do_bad, SIGBUS, 0, "unknown 50" },
{ do_bad, SIGBUS, 0, "unknown 51" },
{ do_bad, SIGBUS, 0, "implementation fault (lockdown abort)" },
- { do_bad, SIGBUS, 0, "unknown 53" },
+ { do_bad, SIGBUS, 0, "implementation fault (unsupported exclusive)" },
{ do_bad, SIGBUS, 0, "unknown 54" },
{ do_bad, SIGBUS, 0, "unknown 55" },
{ do_bad, SIGBUS, 0, "unknown 56" },
{ do_bad, SIGBUS, 0, "unknown 57" },
- { do_bad, SIGBUS, 0, "implementation fault (coprocessor abort)" },
+ { do_bad, SIGBUS, 0, "unknown 58" },
{ do_bad, SIGBUS, 0, "unknown 59" },
{ do_bad, SIGBUS, 0, "unknown 60" },
- { do_bad, SIGBUS, 0, "unknown 61" },
- { do_bad, SIGBUS, 0, "unknown 62" },
+ { do_bad, SIGBUS, 0, "section domain fault" },
+ { do_bad, SIGBUS, 0, "page domain fault" },
{ do_bad, SIGBUS, 0, "unknown 63" },
};
@@ -478,22 +487,37 @@ asmlinkage void __exception do_sp_pc_abort(unsigned long addr,
struct pt_regs *regs)
{
struct siginfo info;
+ struct task_struct *tsk = current;
+
+ if (show_unhandled_signals && unhandled_signal(tsk, SIGBUS))
+ pr_info_ratelimited("%s[%d]: %s exception: pc=%p sp=%p\n",
+ tsk->comm, task_pid_nr(tsk),
+ esr_get_class_string(esr), (void *)regs->pc,
+ (void *)regs->sp);
info.si_signo = SIGBUS;
info.si_errno = 0;
info.si_code = BUS_ADRALN;
info.si_addr = (void __user *)addr;
- arm64_notify_die("", regs, &info, esr);
+ arm64_notify_die("Oops - SP/PC alignment exception", regs, &info, esr);
}
-static struct fault_info debug_fault_info[] = {
+int __init early_brk64(unsigned long addr, unsigned int esr,
+ struct pt_regs *regs);
+
+/*
+ * __refdata because early_brk64 is __init, but the reference to it is
+ * clobbered at arch_initcall time.
+ * See traps.c and debug-monitors.c:debug_traps_init().
+ */
+static struct fault_info __refdata debug_fault_info[] = {
{ do_bad, SIGTRAP, TRAP_HWBKPT, "hardware breakpoint" },
{ do_bad, SIGTRAP, TRAP_HWBKPT, "hardware single-step" },
{ do_bad, SIGTRAP, TRAP_HWBKPT, "hardware watchpoint" },
{ do_bad, SIGBUS, 0, "unknown 3" },
{ do_bad, SIGTRAP, TRAP_BRKPT, "aarch32 BKPT" },
{ do_bad, SIGTRAP, 0, "aarch32 vector catch" },
- { do_bad, SIGTRAP, TRAP_BRKPT, "aarch64 BRK" },
+ { early_brk64, SIGTRAP, TRAP_BRKPT, "aarch64 BRK" },
{ do_bad, SIGBUS, 0, "unknown 7" },
};
@@ -530,3 +554,10 @@ asmlinkage int __exception do_debug_exception(unsigned long addr,
return 0;
}
+
+#ifdef CONFIG_ARM64_PAN
+void cpu_enable_pan(void *__unused)
+{
+ config_sctlr_el1(SCTLR_EL1_SPAN, 0);
+}
+#endif /* CONFIG_ARM64_PAN */
diff --git a/kernel/arch/arm64/mm/flush.c b/kernel/arch/arm64/mm/flush.c
index b6f14e8d2..c26b80401 100644
--- a/kernel/arch/arm64/mm/flush.c
+++ b/kernel/arch/arm64/mm/flush.c
@@ -60,14 +60,10 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
unsigned long uaddr, void *dst, const void *src,
unsigned long len)
{
-#ifdef CONFIG_SMP
preempt_disable();
-#endif
memcpy(dst, src, len);
flush_ptrace_access(vma, page, uaddr, dst, len);
-#ifdef CONFIG_SMP
preempt_enable();
-#endif
}
void __sync_icache_dcache(pte_t pte, unsigned long addr)
@@ -102,7 +98,6 @@ EXPORT_SYMBOL(flush_dcache_page);
/*
* Additional functions defined in assembly.
*/
-EXPORT_SYMBOL(flush_cache_all);
EXPORT_SYMBOL(flush_icache_range);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/kernel/arch/arm64/mm/hugetlbpage.c b/kernel/arch/arm64/mm/hugetlbpage.c
index 0eeb4f093..383b03ff3 100644
--- a/kernel/arch/arm64/mm/hugetlbpage.c
+++ b/kernel/arch/arm64/mm/hugetlbpage.c
@@ -13,10 +13,6 @@
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/init.h>
@@ -31,13 +27,6 @@
#include <asm/tlbflush.h>
#include <asm/pgalloc.h>
-#ifndef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
-int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
-{
- return 0;
-}
-#endif
-
int pmd_huge(pmd_t pmd)
{
return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
diff --git a/kernel/arch/arm64/mm/init.c b/kernel/arch/arm64/mm/init.c
index ad87ce826..4cb98aa8c 100644
--- a/kernel/arch/arm64/mm/init.c
+++ b/kernel/arch/arm64/mm/init.c
@@ -86,10 +86,10 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
memset(zone_size, 0, sizeof(zone_size));
/* 4GB maximum for 32-bit only capable devices */
- if (IS_ENABLED(CONFIG_ZONE_DMA)) {
- max_dma = PFN_DOWN(arm64_dma_phys_limit);
- zone_size[ZONE_DMA] = max_dma - min;
- }
+#ifdef CONFIG_ZONE_DMA
+ max_dma = PFN_DOWN(arm64_dma_phys_limit);
+ zone_size[ZONE_DMA] = max_dma - min;
+#endif
zone_size[ZONE_NORMAL] = max - max_dma;
memcpy(zhole_size, zone_size, sizeof(zhole_size));
@@ -101,11 +101,12 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
if (start >= max)
continue;
- if (IS_ENABLED(CONFIG_ZONE_DMA) && start < max_dma) {
+#ifdef CONFIG_ZONE_DMA
+ if (start < max_dma) {
unsigned long dma_end = min(end, max_dma);
zhole_size[ZONE_DMA] -= dma_end - start;
}
-
+#endif
if (end > max_dma) {
unsigned long normal_end = min(end, max);
unsigned long normal_start = max(start, max_dma);
@@ -298,6 +299,9 @@ void __init mem_init(void)
#define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K)
pr_notice("Virtual kernel memory layout:\n"
+#ifdef CONFIG_KASAN
+ " kasan : 0x%16lx - 0x%16lx (%6ld GB)\n"
+#endif
" vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n"
#ifdef CONFIG_SPARSEMEM_VMEMMAP
" vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n"
@@ -310,10 +314,13 @@ void __init mem_init(void)
" .init : 0x%p" " - 0x%p" " (%6ld KB)\n"
" .text : 0x%p" " - 0x%p" " (%6ld KB)\n"
" .data : 0x%p" " - 0x%p" " (%6ld KB)\n",
+#ifdef CONFIG_KASAN
+ MLG(KASAN_SHADOW_START, KASAN_SHADOW_END),
+#endif
MLG(VMALLOC_START, VMALLOC_END),
#ifdef CONFIG_SPARSEMEM_VMEMMAP
- MLG((unsigned long)vmemmap,
- (unsigned long)vmemmap + VMEMMAP_SIZE),
+ MLG(VMEMMAP_START,
+ VMEMMAP_START + VMEMMAP_SIZE),
MLM((unsigned long)virt_to_page(PAGE_OFFSET),
(unsigned long)virt_to_page(high_memory)),
#endif
@@ -358,9 +365,9 @@ void free_initmem(void)
#ifdef CONFIG_BLK_DEV_INITRD
-static int keep_initrd;
+static int keep_initrd __initdata;
-void free_initrd_mem(unsigned long start, unsigned long end)
+void __init free_initrd_mem(unsigned long start, unsigned long end)
{
if (!keep_initrd)
free_reserved_area((void *)start, (void *)end, 0, "initrd");
diff --git a/kernel/arch/arm64/mm/kasan_init.c b/kernel/arch/arm64/mm/kasan_init.c
new file mode 100644
index 000000000..cf038c7d9
--- /dev/null
+++ b/kernel/arch/arm64/mm/kasan_init.c
@@ -0,0 +1,165 @@
+/*
+ * This file contains kasan initialization code for ARM64.
+ *
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#define pr_fmt(fmt) "kasan: " fmt
+#include <linux/kasan.h>
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <linux/start_kernel.h>
+
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
+
+static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
+ unsigned long end)
+{
+ pte_t *pte;
+ unsigned long next;
+
+ if (pmd_none(*pmd))
+ pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte);
+
+ pte = pte_offset_kernel(pmd, addr);
+ do {
+ next = addr + PAGE_SIZE;
+ set_pte(pte, pfn_pte(virt_to_pfn(kasan_zero_page),
+ PAGE_KERNEL));
+ } while (pte++, addr = next, addr != end && pte_none(*pte));
+}
+
+static void __init kasan_early_pmd_populate(pud_t *pud,
+ unsigned long addr,
+ unsigned long end)
+{
+ pmd_t *pmd;
+ unsigned long next;
+
+ if (pud_none(*pud))
+ pud_populate(&init_mm, pud, kasan_zero_pmd);
+
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+ kasan_early_pte_populate(pmd, addr, next);
+ } while (pmd++, addr = next, addr != end && pmd_none(*pmd));
+}
+
+static void __init kasan_early_pud_populate(pgd_t *pgd,
+ unsigned long addr,
+ unsigned long end)
+{
+ pud_t *pud;
+ unsigned long next;
+
+ if (pgd_none(*pgd))
+ pgd_populate(&init_mm, pgd, kasan_zero_pud);
+
+ pud = pud_offset(pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ kasan_early_pmd_populate(pud, addr, next);
+ } while (pud++, addr = next, addr != end && pud_none(*pud));
+}
+
+static void __init kasan_map_early_shadow(void)
+{
+ unsigned long addr = KASAN_SHADOW_START;
+ unsigned long end = KASAN_SHADOW_END;
+ unsigned long next;
+ pgd_t *pgd;
+
+ pgd = pgd_offset_k(addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ kasan_early_pud_populate(pgd, addr, next);
+ } while (pgd++, addr = next, addr != end);
+}
+
+asmlinkage void __init kasan_early_init(void)
+{
+ BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61));
+ BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
+ BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
+ kasan_map_early_shadow();
+}
+
+static void __init clear_pgds(unsigned long start,
+ unsigned long end)
+{
+ /*
+ * Remove references to kasan page tables from
+ * swapper_pg_dir. pgd_clear() can't be used
+ * here because it's nop on 2,3-level pagetable setups
+ */
+ for (; start < end; start += PGDIR_SIZE)
+ set_pgd(pgd_offset_k(start), __pgd(0));
+}
+
+static void __init cpu_set_ttbr1(unsigned long ttbr1)
+{
+ asm(
+ " msr ttbr1_el1, %0\n"
+ " isb"
+ :
+ : "r" (ttbr1));
+}
+
+void __init kasan_init(void)
+{
+ struct memblock_region *reg;
+
+ /*
+ * We are going to perform proper setup of shadow memory.
+ * At first we should unmap early shadow (clear_pgds() call bellow).
+ * However, instrumented code couldn't execute without shadow memory.
+ * tmp_pg_dir used to keep early shadow mapped until full shadow
+ * setup will be finished.
+ */
+ memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
+ cpu_set_ttbr1(__pa(tmp_pg_dir));
+ flush_tlb_all();
+
+ clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+ kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
+ kasan_mem_to_shadow((void *)MODULES_VADDR));
+
+ for_each_memblock(memory, reg) {
+ void *start = (void *)__phys_to_virt(reg->base);
+ void *end = (void *)__phys_to_virt(reg->base + reg->size);
+
+ if (start >= end)
+ break;
+
+ /*
+ * end + 1 here is intentional. We check several shadow bytes in
+ * advance to slightly speed up fastpath. In some rare cases
+ * we could cross boundary of mapped shadow, so we just map
+ * some more here.
+ */
+ vmemmap_populate((unsigned long)kasan_mem_to_shadow(start),
+ (unsigned long)kasan_mem_to_shadow(end) + 1,
+ pfn_to_nid(virt_to_pfn(start)));
+ }
+
+ memset(kasan_zero_page, 0, PAGE_SIZE);
+ cpu_set_ttbr1(__pa(swapper_pg_dir));
+ flush_tlb_all();
+
+ /* At this point kasan is fully initialized. Enable error messages */
+ init_task.kasan_depth = 0;
+ pr_info("KernelAddressSanitizer initialized\n");
+}
diff --git a/kernel/arch/arm64/mm/mmu.c b/kernel/arch/arm64/mm/mmu.c
index 5b8b66442..116ad654d 100644
--- a/kernel/arch/arm64/mm/mmu.c
+++ b/kernel/arch/arm64/mm/mmu.c
@@ -21,6 +21,7 @@
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/init.h>
+#include <linux/libfdt.h>
#include <linux/mman.h>
#include <linux/nodemask.h>
#include <linux/memblock.h>
@@ -31,6 +32,7 @@
#include <asm/cputype.h>
#include <asm/fixmap.h>
+#include <asm/kernel-pgtable.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/sizes.h>
@@ -62,8 +64,12 @@ EXPORT_SYMBOL(phys_mem_access_prot);
static void __init *early_alloc(unsigned long sz)
{
- void *ptr = __va(memblock_alloc(sz, sz));
- BUG_ON(!ptr);
+ phys_addr_t phys;
+ void *ptr;
+
+ phys = memblock_alloc(sz, sz);
+ BUG_ON(!phys);
+ ptr = __va(phys);
memset(ptr, 0, sz);
return ptr;
}
@@ -109,14 +115,14 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
} while (pte++, addr += PAGE_SIZE, addr != end);
}
-void split_pud(pud_t *old_pud, pmd_t *pmd)
+static void split_pud(pud_t *old_pud, pmd_t *pmd)
{
unsigned long addr = pud_pfn(*old_pud) << PAGE_SHIFT;
pgprot_t prot = __pgprot(pud_val(*old_pud) ^ addr);
int i = 0;
do {
- set_pmd(pmd, __pmd(addr | prot));
+ set_pmd(pmd, __pmd(addr | pgprot_val(prot)));
addr += PMD_SIZE;
} while (pmd++, i++, i < PTRS_PER_PMD);
}
@@ -266,7 +272,7 @@ static void *late_alloc(unsigned long size)
return ptr;
}
-static void __ref create_mapping(phys_addr_t phys, unsigned long virt,
+static void __init create_mapping(phys_addr_t phys, unsigned long virt,
phys_addr_t size, pgprot_t prot)
{
if (virt < VMALLOC_START) {
@@ -307,8 +313,8 @@ static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
* for now. This will get more fine grained later once all memory
* is mapped
*/
- unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE);
- unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
+ unsigned long kernel_x_start = round_down(__pa(_stext), SWAPPER_BLOCK_SIZE);
+ unsigned long kernel_x_end = round_up(__pa(__init_end), SWAPPER_BLOCK_SIZE);
if (end < kernel_x_start) {
create_mapping(start, __phys_to_virt(start),
@@ -352,14 +358,11 @@ static void __init map_mem(void)
* memory addressable from the initial direct kernel mapping.
*
* The initial direct kernel mapping, located at swapper_pg_dir, gives
- * us PUD_SIZE (4K pages) or PMD_SIZE (64K pages) memory starting from
- * PHYS_OFFSET (which must be aligned to 2MB as per
- * Documentation/arm64/booting.txt).
+ * us PUD_SIZE (with SECTION maps) or PMD_SIZE (without SECTION maps,
+ * memory starting from PHYS_OFFSET (which must be aligned to 2MB as
+ * per Documentation/arm64/booting.txt).
*/
- if (IS_ENABLED(CONFIG_ARM64_64K_PAGES))
- limit = PHYS_OFFSET + PMD_SIZE;
- else
- limit = PHYS_OFFSET + PUD_SIZE;
+ limit = PHYS_OFFSET + SWAPPER_INIT_MAP_SIZE;
memblock_set_current_limit(limit);
/* map all the memory banks */
@@ -370,21 +373,24 @@ static void __init map_mem(void)
if (start >= end)
break;
-#ifndef CONFIG_ARM64_64K_PAGES
- /*
- * For the first memory bank align the start address and
- * current memblock limit to prevent create_mapping() from
- * allocating pte page tables from unmapped memory.
- * When 64K pages are enabled, the pte page table for the
- * first PGDIR_SIZE is already present in swapper_pg_dir.
- */
- if (start < limit)
- start = ALIGN(start, PMD_SIZE);
- if (end < limit) {
- limit = end & PMD_MASK;
- memblock_set_current_limit(limit);
+ if (ARM64_SWAPPER_USES_SECTION_MAPS) {
+ /*
+ * For the first memory bank align the start address and
+ * current memblock limit to prevent create_mapping() from
+ * allocating pte page tables from unmapped memory. With
+ * the section maps, if the first block doesn't end on section
+ * size boundary, create_mapping() will try to allocate a pte
+ * page, which may be returned from an unmapped area.
+ * When section maps are not used, the pte page table for the
+ * current limit is already present in swapper_pg_dir.
+ */
+ if (start < limit)
+ start = ALIGN(start, SECTION_SIZE);
+ if (end < limit) {
+ limit = end & SECTION_MASK;
+ memblock_set_current_limit(limit);
+ }
}
-#endif
__map_memblock(start, end);
}
@@ -392,22 +398,22 @@ static void __init map_mem(void)
memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
}
-void __init fixup_executable(void)
+static void __init fixup_executable(void)
{
#ifdef CONFIG_DEBUG_RODATA
/* now that we are actually fully mapped, make the start/end more fine grained */
- if (!IS_ALIGNED((unsigned long)_stext, SECTION_SIZE)) {
+ if (!IS_ALIGNED((unsigned long)_stext, SWAPPER_BLOCK_SIZE)) {
unsigned long aligned_start = round_down(__pa(_stext),
- SECTION_SIZE);
+ SWAPPER_BLOCK_SIZE);
create_mapping(aligned_start, __phys_to_virt(aligned_start),
__pa(_stext) - aligned_start,
PAGE_KERNEL);
}
- if (!IS_ALIGNED((unsigned long)__init_end, SECTION_SIZE)) {
+ if (!IS_ALIGNED((unsigned long)__init_end, SWAPPER_BLOCK_SIZE)) {
unsigned long aligned_end = round_up(__pa(__init_end),
- SECTION_SIZE);
+ SWAPPER_BLOCK_SIZE);
create_mapping(__pa(__init_end), (unsigned long)__init_end,
aligned_end - __pa(__init_end),
PAGE_KERNEL);
@@ -420,7 +426,7 @@ void mark_rodata_ro(void)
{
create_mapping_late(__pa(_stext), (unsigned long)_stext,
(unsigned long)_etext - (unsigned long)_stext,
- PAGE_KERNEL_EXEC | PTE_RDONLY);
+ PAGE_KERNEL_ROX);
}
#endif
@@ -450,27 +456,19 @@ void __init paging_init(void)
empty_zero_page = virt_to_page(zero_page);
+ /* Ensure the zero page is visible to the page table walker */
+ dsb(ishst);
+
/*
* TTBR0 is only used for the identity mapping at this stage. Make it
* point to zero page to avoid speculatively fetching new entries.
*/
cpu_set_reserved_ttbr0();
- flush_tlb_all();
+ local_flush_tlb_all();
cpu_set_default_tcr_t0sz();
}
/*
- * Enable the identity mapping to allow the MMU disabling.
- */
-void setup_mm_for_reboot(void)
-{
- cpu_set_reserved_ttbr0();
- flush_tlb_all();
- cpu_set_idmap_tcr_t0sz();
- cpu_switch_mm(idmap_pg_dir, &init_mm);
-}
-
-/*
* Check whether a kernel address is valid (derived from arch/x86/).
*/
int kern_addr_valid(unsigned long addr)
@@ -508,12 +506,12 @@ int kern_addr_valid(unsigned long addr)
return pfn_valid(pte_pfn(*pte));
}
#ifdef CONFIG_SPARSEMEM_VMEMMAP
-#ifdef CONFIG_ARM64_64K_PAGES
+#if !ARM64_SWAPPER_USES_SECTION_MAPS
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
{
return vmemmap_populate_basepages(start, end, node);
}
-#else /* !CONFIG_ARM64_64K_PAGES */
+#else /* !ARM64_SWAPPER_USES_SECTION_MAPS */
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
{
unsigned long addr = start;
@@ -643,3 +641,59 @@ void __set_fixmap(enum fixed_addresses idx,
flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
}
}
+
+void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
+{
+ const u64 dt_virt_base = __fix_to_virt(FIX_FDT);
+ pgprot_t prot = PAGE_KERNEL_RO;
+ int size, offset;
+ void *dt_virt;
+
+ /*
+ * Check whether the physical FDT address is set and meets the minimum
+ * alignment requirement. Since we are relying on MIN_FDT_ALIGN to be
+ * at least 8 bytes so that we can always access the size field of the
+ * FDT header after mapping the first chunk, double check here if that
+ * is indeed the case.
+ */
+ BUILD_BUG_ON(MIN_FDT_ALIGN < 8);
+ if (!dt_phys || dt_phys % MIN_FDT_ALIGN)
+ return NULL;
+
+ /*
+ * Make sure that the FDT region can be mapped without the need to
+ * allocate additional translation table pages, so that it is safe
+ * to call create_mapping() this early.
+ *
+ * On 64k pages, the FDT will be mapped using PTEs, so we need to
+ * be in the same PMD as the rest of the fixmap.
+ * On 4k pages, we'll use section mappings for the FDT so we only
+ * have to be in the same PUD.
+ */
+ BUILD_BUG_ON(dt_virt_base % SZ_2M);
+
+ BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> SWAPPER_TABLE_SHIFT !=
+ __fix_to_virt(FIX_BTMAP_BEGIN) >> SWAPPER_TABLE_SHIFT);
+
+ offset = dt_phys % SWAPPER_BLOCK_SIZE;
+ dt_virt = (void *)dt_virt_base + offset;
+
+ /* map the first chunk so we can read the size from the header */
+ create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
+ SWAPPER_BLOCK_SIZE, prot);
+
+ if (fdt_check_header(dt_virt) != 0)
+ return NULL;
+
+ size = fdt_totalsize(dt_virt);
+ if (size > MAX_FDT_SIZE)
+ return NULL;
+
+ if (offset + size > SWAPPER_BLOCK_SIZE)
+ create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
+ round_up(offset + size, SWAPPER_BLOCK_SIZE), prot);
+
+ memblock_reserve(dt_phys, size);
+
+ return dt_virt;
+}
diff --git a/kernel/arch/arm64/mm/pageattr.c b/kernel/arch/arm64/mm/pageattr.c
index e47ed1c5d..cf6240741 100644
--- a/kernel/arch/arm64/mm/pageattr.c
+++ b/kernel/arch/arm64/mm/pageattr.c
@@ -45,7 +45,7 @@ static int change_memory_common(unsigned long addr, int numpages,
int ret;
struct page_change_data data;
- if (!IS_ALIGNED(addr, PAGE_SIZE)) {
+ if (!PAGE_ALIGNED(addr)) {
start &= PAGE_MASK;
end = start + size;
WARN_ON_ONCE(1);
@@ -57,6 +57,9 @@ static int change_memory_common(unsigned long addr, int numpages,
if (end < MODULES_VADDR || end >= MODULES_END)
return -EINVAL;
+ if (!numpages)
+ return 0;
+
data.set_mask = set_mask;
data.clear_mask = clear_mask;
diff --git a/kernel/arch/arm64/mm/pgd.c b/kernel/arch/arm64/mm/pgd.c
index 71ca104f9..cb3ba1b81 100644
--- a/kernel/arch/arm64/mm/pgd.c
+++ b/kernel/arch/arm64/mm/pgd.c
@@ -28,8 +28,6 @@
#include "mm.h"
-#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
-
static struct kmem_cache *pgd_cache;
pgd_t *pgd_alloc(struct mm_struct *mm)
diff --git a/kernel/arch/arm64/mm/proc-macros.S b/kernel/arch/arm64/mm/proc-macros.S
index 4c4d93c4b..d69dffffa 100644
--- a/kernel/arch/arm64/mm/proc-macros.S
+++ b/kernel/arch/arm64/mm/proc-macros.S
@@ -62,3 +62,15 @@
bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
#endif
.endm
+
+/*
+ * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
+ */
+ .macro reset_pmuserenr_el0, tmpreg
+ mrs \tmpreg, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer
+ sbfx \tmpreg, \tmpreg, #8, #4
+ cmp \tmpreg, #1 // Skip if no PMU present
+ b.lt 9000f
+ msr pmuserenr_el0, xzr // Disable PMU access from EL0
+9000:
+ .endm
diff --git a/kernel/arch/arm64/mm/proc.S b/kernel/arch/arm64/mm/proc.S
index cdd754e19..b8f04b3f2 100644
--- a/kernel/arch/arm64/mm/proc.S
+++ b/kernel/arch/arm64/mm/proc.S
@@ -30,15 +30,13 @@
#ifdef CONFIG_ARM64_64K_PAGES
#define TCR_TG_FLAGS TCR_TG0_64K | TCR_TG1_64K
-#else
+#elif defined(CONFIG_ARM64_16K_PAGES)
+#define TCR_TG_FLAGS TCR_TG0_16K | TCR_TG1_16K
+#else /* CONFIG_ARM64_4K_PAGES */
#define TCR_TG_FLAGS TCR_TG0_4K | TCR_TG1_4K
#endif
-#ifdef CONFIG_SMP
#define TCR_SMP_FLAGS TCR_SHARED
-#else
-#define TCR_SMP_FLAGS 0
-#endif
/* PTWs cacheable, inner/outer WBWA */
#define TCR_CACHE_FLAGS TCR_IRGN_WBWA | TCR_ORGN_WBWA
@@ -46,52 +44,6 @@
#define MAIR(attr, mt) ((attr) << ((mt) * 8))
/*
- * cpu_cache_off()
- *
- * Turn the CPU D-cache off.
- */
-ENTRY(cpu_cache_off)
- mrs x0, sctlr_el1
- bic x0, x0, #1 << 2 // clear SCTLR.C
- msr sctlr_el1, x0
- isb
- ret
-ENDPROC(cpu_cache_off)
-
-/*
- * cpu_reset(loc)
- *
- * Perform a soft reset of the system. Put the CPU into the same state
- * as it would be if it had been reset, and branch to what would be the
- * reset vector. It must be executed with the flat identity mapping.
- *
- * - loc - location to jump to for soft reset
- */
- .align 5
-ENTRY(cpu_reset)
- mrs x1, sctlr_el1
- bic x1, x1, #1
- msr sctlr_el1, x1 // disable the MMU
- isb
- ret x0
-ENDPROC(cpu_reset)
-
-ENTRY(cpu_soft_restart)
- /* Save address of cpu_reset() and reset address */
- mov x19, x0
- mov x20, x1
-
- /* Turn D-cache off */
- bl cpu_cache_off
-
- /* Push out all dirty data, and ensure cache is empty */
- bl flush_cache_all
-
- mov x0, x20
- ret x19
-ENDPROC(cpu_soft_restart)
-
-/*
* cpu_do_idle()
*
* Idle the processor (wait for interrupt).
@@ -165,6 +117,7 @@ ENTRY(cpu_do_resume)
*/
ubfx x11, x11, #1, #1
msr oslar_el1, x11
+ reset_pmuserenr_el0 x0 // Disable PMU access from EL0
mov x0, x12
dsb nsh // Make sure local tlb invalidation completed
isb
@@ -180,7 +133,7 @@ ENDPROC(cpu_do_resume)
* - pgd_phys - physical address of new TTB
*/
ENTRY(cpu_do_switch_mm)
- mmid w1, x1 // get mm->context.id
+ mmid x1, x1 // get mm->context.id
bfi x0, x1, #48, #16 // set the ASID
msr ttbr0_el1, x0 // set TTBR0
isb
@@ -196,13 +149,14 @@ ENDPROC(cpu_do_switch_mm)
* value of the SCTLR_EL1 register.
*/
ENTRY(__cpu_setup)
- ic iallu // I+BTB cache invalidate
- tlbi vmalle1is // invalidate I + D TLBs
- dsb ish
+ tlbi vmalle1 // Invalidate local TLB
+ dsb nsh
mov x0, #3 << 20
msr cpacr_el1, x0 // Enable FP/ASIMD
- msr mdscr_el1, xzr // Reset mdscr_el1
+ mov x0, #1 << 12 // Reset mdscr_el1 and disable
+ msr mdscr_el1, x0 // access to the DCC from EL0
+ reset_pmuserenr_el0 x0 // Disable PMU access from EL0
/*
* Memory region attributes for LPAE:
*
@@ -213,12 +167,14 @@ ENTRY(__cpu_setup)
* DEVICE_GRE 010 00001100
* NORMAL_NC 011 01000100
* NORMAL 100 11111111
+ * NORMAL_WT 101 10111011
*/
ldr x5, =MAIR(0x00, MT_DEVICE_nGnRnE) | \
MAIR(0x04, MT_DEVICE_nGnRE) | \
MAIR(0x0c, MT_DEVICE_GRE) | \
MAIR(0x44, MT_NORMAL_NC) | \
- MAIR(0xff, MT_NORMAL)
+ MAIR(0xff, MT_NORMAL) | \
+ MAIR(0xbb, MT_NORMAL_WT)
msr mair_el1, x5
/*
* Prepare SCTLR
@@ -242,6 +198,19 @@ ENTRY(__cpu_setup)
*/
mrs x9, ID_AA64MMFR0_EL1
bfi x10, x9, #32, #3
+#ifdef CONFIG_ARM64_HW_AFDBM
+ /*
+ * Hardware update of the Access and Dirty bits.
+ */
+ mrs x9, ID_AA64MMFR1_EL1
+ and x9, x9, #0xf
+ cbz x9, 2f
+ cmp x9, #2
+ b.lt 1f
+ orr x10, x10, #TCR_HD // hardware Dirty flag update
+1: orr x10, x10, #TCR_HA // hardware Access flag update
+2:
+#endif /* CONFIG_ARM64_HW_AFDBM */
msr tcr_el1, x10
ret // return to head.S
ENDPROC(__cpu_setup)