From 9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 Mon Sep 17 00:00:00 2001 From: Yunhong Jiang Date: Tue, 4 Aug 2015 12:17:53 -0700 Subject: Add the rt linux 4.1.3-rt3 as base Import the rt linux 4.1.3-rt3 as OPNFV kvm base. It's from git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git linux-4.1.y-rt and the base is: commit 0917f823c59692d751951bf5ea699a2d1e2f26a2 Author: Sebastian Andrzej Siewior Date: Sat Jul 25 12:13:34 2015 +0200 Prepare v4.1.3-rt3 Signed-off-by: Sebastian Andrzej Siewior We lose all the git history this way and it's not good. We should apply another opnfv project repo in future. Change-Id: I87543d81c9df70d99c5001fbdf646b202c19f423 Signed-off-by: Yunhong Jiang --- kernel/arch/x86/power/Makefile | 7 + kernel/arch/x86/power/cpu.c | 326 +++++++++++++++++++++++++++++++ kernel/arch/x86/power/hibernate_32.c | 172 ++++++++++++++++ kernel/arch/x86/power/hibernate_64.c | 147 ++++++++++++++ kernel/arch/x86/power/hibernate_asm_32.S | 84 ++++++++ kernel/arch/x86/power/hibernate_asm_64.S | 150 ++++++++++++++ 6 files changed, 886 insertions(+) create mode 100644 kernel/arch/x86/power/Makefile create mode 100644 kernel/arch/x86/power/cpu.c create mode 100644 kernel/arch/x86/power/hibernate_32.c create mode 100644 kernel/arch/x86/power/hibernate_64.c create mode 100644 kernel/arch/x86/power/hibernate_asm_32.S create mode 100644 kernel/arch/x86/power/hibernate_asm_64.S (limited to 'kernel/arch/x86/power') diff --git a/kernel/arch/x86/power/Makefile b/kernel/arch/x86/power/Makefile new file mode 100644 index 000000000..a6a198c33 --- /dev/null +++ b/kernel/arch/x86/power/Makefile @@ -0,0 +1,7 @@ +# __restore_processor_state() restores %gs after S3 resume and so should not +# itself be stack-protected +nostackp := $(call cc-option, -fno-stack-protector) +CFLAGS_cpu.o := $(nostackp) + +obj-$(CONFIG_PM_SLEEP) += cpu.o +obj-$(CONFIG_HIBERNATION) += hibernate_$(BITS).o hibernate_asm_$(BITS).o diff --git a/kernel/arch/x86/power/cpu.c b/kernel/arch/x86/power/cpu.c new file mode 100644 index 000000000..757678fb2 --- /dev/null +++ b/kernel/arch/x86/power/cpu.c @@ -0,0 +1,326 @@ +/* + * Suspend support specific for i386/x86-64. + * + * Distribute under GPLv2 + * + * Copyright (c) 2007 Rafael J. Wysocki + * Copyright (c) 2002 Pavel Machek + * Copyright (c) 2001 Patrick Mochel + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include /* pcntxt_mask */ +#include + +#ifdef CONFIG_X86_32 +__visible unsigned long saved_context_ebx; +__visible unsigned long saved_context_esp, saved_context_ebp; +__visible unsigned long saved_context_esi, saved_context_edi; +__visible unsigned long saved_context_eflags; +#endif +struct saved_context saved_context; + +/** + * __save_processor_state - save CPU registers before creating a + * hibernation image and before restoring the memory state from it + * @ctxt - structure to store the registers contents in + * + * NOTE: If there is a CPU register the modification of which by the + * boot kernel (ie. the kernel used for loading the hibernation image) + * might affect the operations of the restored target kernel (ie. the one + * saved in the hibernation image), then its contents must be saved by this + * function. In other words, if kernel A is hibernated and different + * kernel B is used for loading the hibernation image into memory, the + * kernel A's __save_processor_state() function must save all registers + * needed by kernel A, so that it can operate correctly after the resume + * regardless of what kernel B does in the meantime. + */ +static void __save_processor_state(struct saved_context *ctxt) +{ +#ifdef CONFIG_X86_32 + mtrr_save_fixed_ranges(NULL); +#endif + kernel_fpu_begin(); + + /* + * descriptor tables + */ +#ifdef CONFIG_X86_32 + store_idt(&ctxt->idt); +#else +/* CONFIG_X86_64 */ + store_idt((struct desc_ptr *)&ctxt->idt_limit); +#endif + /* + * We save it here, but restore it only in the hibernate case. + * For ACPI S3 resume, this is loaded via 'early_gdt_desc' in 64-bit + * mode in "secondary_startup_64". In 32-bit mode it is done via + * 'pmode_gdt' in wakeup_start. + */ + ctxt->gdt_desc.size = GDT_SIZE - 1; + ctxt->gdt_desc.address = (unsigned long)get_cpu_gdt_table(smp_processor_id()); + + store_tr(ctxt->tr); + + /* XMM0..XMM15 should be handled by kernel_fpu_begin(). */ + /* + * segment registers + */ +#ifdef CONFIG_X86_32 + savesegment(es, ctxt->es); + savesegment(fs, ctxt->fs); + savesegment(gs, ctxt->gs); + savesegment(ss, ctxt->ss); +#else +/* CONFIG_X86_64 */ + asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds)); + asm volatile ("movw %%es, %0" : "=m" (ctxt->es)); + asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs)); + asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs)); + asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss)); + + rdmsrl(MSR_FS_BASE, ctxt->fs_base); + rdmsrl(MSR_GS_BASE, ctxt->gs_base); + rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); + mtrr_save_fixed_ranges(NULL); + + rdmsrl(MSR_EFER, ctxt->efer); +#endif + + /* + * control registers + */ + ctxt->cr0 = read_cr0(); + ctxt->cr2 = read_cr2(); + ctxt->cr3 = read_cr3(); + ctxt->cr4 = __read_cr4_safe(); +#ifdef CONFIG_X86_64 + ctxt->cr8 = read_cr8(); +#endif + ctxt->misc_enable_saved = !rdmsrl_safe(MSR_IA32_MISC_ENABLE, + &ctxt->misc_enable); +} + +/* Needed by apm.c */ +void save_processor_state(void) +{ + __save_processor_state(&saved_context); + x86_platform.save_sched_clock_state(); +} +#ifdef CONFIG_X86_32 +EXPORT_SYMBOL(save_processor_state); +#endif + +static void do_fpu_end(void) +{ + /* + * Restore FPU regs if necessary. + */ + kernel_fpu_end(); +} + +static void fix_processor_context(void) +{ + int cpu = smp_processor_id(); + struct tss_struct *t = &per_cpu(cpu_tss, cpu); +#ifdef CONFIG_X86_64 + struct desc_struct *desc = get_cpu_gdt_table(cpu); + tss_desc tss; +#endif + set_tss_desc(cpu, t); /* + * This just modifies memory; should not be + * necessary. But... This is necessary, because + * 386 hardware has concept of busy TSS or some + * similar stupidity. + */ + +#ifdef CONFIG_X86_64 + memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc)); + tss.type = 0x9; /* The available 64-bit TSS (see AMD vol 2, pg 91 */ + write_gdt_entry(desc, GDT_ENTRY_TSS, &tss, DESC_TSS); + + syscall_init(); /* This sets MSR_*STAR and related */ +#endif + load_TR_desc(); /* This does ltr */ + load_LDT(¤t->active_mm->context); /* This does lldt */ +} + +/** + * __restore_processor_state - restore the contents of CPU registers saved + * by __save_processor_state() + * @ctxt - structure to load the registers contents from + */ +static void notrace __restore_processor_state(struct saved_context *ctxt) +{ + if (ctxt->misc_enable_saved) + wrmsrl(MSR_IA32_MISC_ENABLE, ctxt->misc_enable); + /* + * control registers + */ + /* cr4 was introduced in the Pentium CPU */ +#ifdef CONFIG_X86_32 + if (ctxt->cr4) + __write_cr4(ctxt->cr4); +#else +/* CONFIG X86_64 */ + wrmsrl(MSR_EFER, ctxt->efer); + write_cr8(ctxt->cr8); + __write_cr4(ctxt->cr4); +#endif + write_cr3(ctxt->cr3); + write_cr2(ctxt->cr2); + write_cr0(ctxt->cr0); + + /* + * now restore the descriptor tables to their proper values + * ltr is done i fix_processor_context(). + */ +#ifdef CONFIG_X86_32 + load_idt(&ctxt->idt); +#else +/* CONFIG_X86_64 */ + load_idt((const struct desc_ptr *)&ctxt->idt_limit); +#endif + + /* + * segment registers + */ +#ifdef CONFIG_X86_32 + loadsegment(es, ctxt->es); + loadsegment(fs, ctxt->fs); + loadsegment(gs, ctxt->gs); + loadsegment(ss, ctxt->ss); + + /* + * sysenter MSRs + */ + if (boot_cpu_has(X86_FEATURE_SEP)) + enable_sep_cpu(); +#else +/* CONFIG_X86_64 */ + asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds)); + asm volatile ("movw %0, %%es" :: "r" (ctxt->es)); + asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs)); + load_gs_index(ctxt->gs); + asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss)); + + wrmsrl(MSR_FS_BASE, ctxt->fs_base); + wrmsrl(MSR_GS_BASE, ctxt->gs_base); + wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); +#endif + + /* + * restore XCR0 for xsave capable cpu's. + */ + if (cpu_has_xsave) + xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); + + fix_processor_context(); + + do_fpu_end(); + x86_platform.restore_sched_clock_state(); + mtrr_bp_restore(); + perf_restore_debug_store(); +} + +/* Needed by apm.c */ +void notrace restore_processor_state(void) +{ + __restore_processor_state(&saved_context); +} +#ifdef CONFIG_X86_32 +EXPORT_SYMBOL(restore_processor_state); +#endif + +/* + * When bsp_check() is called in hibernate and suspend, cpu hotplug + * is disabled already. So it's unnessary to handle race condition between + * cpumask query and cpu hotplug. + */ +static int bsp_check(void) +{ + if (cpumask_first(cpu_online_mask) != 0) { + pr_warn("CPU0 is offline.\n"); + return -ENODEV; + } + + return 0; +} + +static int bsp_pm_callback(struct notifier_block *nb, unsigned long action, + void *ptr) +{ + int ret = 0; + + switch (action) { + case PM_SUSPEND_PREPARE: + case PM_HIBERNATION_PREPARE: + ret = bsp_check(); + break; +#ifdef CONFIG_DEBUG_HOTPLUG_CPU0 + case PM_RESTORE_PREPARE: + /* + * When system resumes from hibernation, online CPU0 because + * 1. it's required for resume and + * 2. the CPU was online before hibernation + */ + if (!cpu_online(0)) + _debug_hotplug_cpu(0, 1); + break; + case PM_POST_RESTORE: + /* + * When a resume really happens, this code won't be called. + * + * This code is called only when user space hibernation software + * prepares for snapshot device during boot time. So we just + * call _debug_hotplug_cpu() to restore to CPU0's state prior to + * preparing the snapshot device. + * + * This works for normal boot case in our CPU0 hotplug debug + * mode, i.e. CPU0 is offline and user mode hibernation + * software initializes during boot time. + * + * If CPU0 is online and user application accesses snapshot + * device after boot time, this will offline CPU0 and user may + * see different CPU0 state before and after accessing + * the snapshot device. But hopefully this is not a case when + * user debugging CPU0 hotplug. Even if users hit this case, + * they can easily online CPU0 back. + * + * To simplify this debug code, we only consider normal boot + * case. Otherwise we need to remember CPU0's state and restore + * to that state and resolve racy conditions etc. + */ + _debug_hotplug_cpu(0, 0); + break; +#endif + default: + break; + } + return notifier_from_errno(ret); +} + +static int __init bsp_pm_check_init(void) +{ + /* + * Set this bsp_pm_callback as lower priority than + * cpu_hotplug_pm_callback. So cpu_hotplug_pm_callback will be called + * earlier to disable cpu hotplug before bsp online check. + */ + pm_notifier(bsp_pm_callback, -INT_MAX); + return 0; +} + +core_initcall(bsp_pm_check_init); diff --git a/kernel/arch/x86/power/hibernate_32.c b/kernel/arch/x86/power/hibernate_32.c new file mode 100644 index 000000000..291226b95 --- /dev/null +++ b/kernel/arch/x86/power/hibernate_32.c @@ -0,0 +1,172 @@ +/* + * Hibernation support specific for i386 - temporary page tables + * + * Distribute under GPLv2 + * + * Copyright (c) 2006 Rafael J. Wysocki + */ + +#include +#include +#include + +#include +#include +#include +#include + +/* Defined in hibernate_asm_32.S */ +extern int restore_image(void); + +/* Pointer to the temporary resume page tables */ +pgd_t *resume_pg_dir; + +/* The following three functions are based on the analogous code in + * arch/x86/mm/init_32.c + */ + +/* + * Create a middle page table on a resume-safe page and put a pointer to it in + * the given global directory entry. This only returns the gd entry + * in non-PAE compilation mode, since the middle layer is folded. + */ +static pmd_t *resume_one_md_table_init(pgd_t *pgd) +{ + pud_t *pud; + pmd_t *pmd_table; + +#ifdef CONFIG_X86_PAE + pmd_table = (pmd_t *)get_safe_page(GFP_ATOMIC); + if (!pmd_table) + return NULL; + + set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); + pud = pud_offset(pgd, 0); + + BUG_ON(pmd_table != pmd_offset(pud, 0)); +#else + pud = pud_offset(pgd, 0); + pmd_table = pmd_offset(pud, 0); +#endif + + return pmd_table; +} + +/* + * Create a page table on a resume-safe page and place a pointer to it in + * a middle page directory entry. + */ +static pte_t *resume_one_page_table_init(pmd_t *pmd) +{ + if (pmd_none(*pmd)) { + pte_t *page_table = (pte_t *)get_safe_page(GFP_ATOMIC); + if (!page_table) + return NULL; + + set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); + + BUG_ON(page_table != pte_offset_kernel(pmd, 0)); + + return page_table; + } + + return pte_offset_kernel(pmd, 0); +} + +/* + * This maps the physical memory to kernel virtual address space, a total + * of max_low_pfn pages, by creating page tables starting from address + * PAGE_OFFSET. The page tables are allocated out of resume-safe pages. + */ +static int resume_physical_mapping_init(pgd_t *pgd_base) +{ + unsigned long pfn; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + int pgd_idx, pmd_idx; + + pgd_idx = pgd_index(PAGE_OFFSET); + pgd = pgd_base + pgd_idx; + pfn = 0; + + for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) { + pmd = resume_one_md_table_init(pgd); + if (!pmd) + return -ENOMEM; + + if (pfn >= max_low_pfn) + continue; + + for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD; pmd++, pmd_idx++) { + if (pfn >= max_low_pfn) + break; + + /* Map with big pages if possible, otherwise create + * normal page tables. + * NOTE: We can mark everything as executable here + */ + if (cpu_has_pse) { + set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); + pfn += PTRS_PER_PTE; + } else { + pte_t *max_pte; + + pte = resume_one_page_table_init(pmd); + if (!pte) + return -ENOMEM; + + max_pte = pte + PTRS_PER_PTE; + for (; pte < max_pte; pte++, pfn++) { + if (pfn >= max_low_pfn) + break; + + set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); + } + } + } + } + + return 0; +} + +static inline void resume_init_first_level_page_table(pgd_t *pg_dir) +{ +#ifdef CONFIG_X86_PAE + int i; + + /* Init entries of the first-level page table to the zero page */ + for (i = 0; i < PTRS_PER_PGD; i++) + set_pgd(pg_dir + i, + __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); +#endif +} + +int swsusp_arch_resume(void) +{ + int error; + + resume_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC); + if (!resume_pg_dir) + return -ENOMEM; + + resume_init_first_level_page_table(resume_pg_dir); + error = resume_physical_mapping_init(resume_pg_dir); + if (error) + return error; + + /* We have got enough memory and from now on we cannot recover */ + restore_image(); + return 0; +} + +/* + * pfn_is_nosave - check if given pfn is in the 'nosave' section + */ + +int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT; + unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT; + return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); +} diff --git a/kernel/arch/x86/power/hibernate_64.c b/kernel/arch/x86/power/hibernate_64.c new file mode 100644 index 000000000..009947d41 --- /dev/null +++ b/kernel/arch/x86/power/hibernate_64.c @@ -0,0 +1,147 @@ +/* + * Hibernation support for x86-64 + * + * Distribute under GPLv2 + * + * Copyright (c) 2007 Rafael J. Wysocki + * Copyright (c) 2002 Pavel Machek + * Copyright (c) 2001 Patrick Mochel + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +/* Defined in hibernate_asm_64.S */ +extern asmlinkage __visible int restore_image(void); + +/* + * Address to jump to in the last phase of restore in order to get to the image + * kernel's text (this value is passed in the image header). + */ +unsigned long restore_jump_address __visible; + +/* + * Value of the cr3 register from before the hibernation (this value is passed + * in the image header). + */ +unsigned long restore_cr3 __visible; + +pgd_t *temp_level4_pgt __visible; + +void *relocated_restore_code __visible; + +static void *alloc_pgt_page(void *context) +{ + return (void *)get_safe_page(GFP_ATOMIC); +} + +static int set_up_temporary_mappings(void) +{ + struct x86_mapping_info info = { + .alloc_pgt_page = alloc_pgt_page, + .pmd_flag = __PAGE_KERNEL_LARGE_EXEC, + .kernel_mapping = true, + }; + unsigned long mstart, mend; + int result; + int i; + + temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC); + if (!temp_level4_pgt) + return -ENOMEM; + + /* It is safe to reuse the original kernel mapping */ + set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map), + init_level4_pgt[pgd_index(__START_KERNEL_map)]); + + /* Set up the direct mapping from scratch */ + for (i = 0; i < nr_pfn_mapped; i++) { + mstart = pfn_mapped[i].start << PAGE_SHIFT; + mend = pfn_mapped[i].end << PAGE_SHIFT; + + result = kernel_ident_mapping_init(&info, temp_level4_pgt, + mstart, mend); + + if (result) + return result; + } + + return 0; +} + +int swsusp_arch_resume(void) +{ + int error; + + /* We have got enough memory and from now on we cannot recover */ + if ((error = set_up_temporary_mappings())) + return error; + + relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC); + if (!relocated_restore_code) + return -ENOMEM; + memcpy(relocated_restore_code, &core_restore_code, + &restore_registers - &core_restore_code); + + restore_image(); + return 0; +} + +/* + * pfn_is_nosave - check if given pfn is in the 'nosave' section + */ + +int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT; + unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT; + return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); +} + +struct restore_data_record { + unsigned long jump_address; + unsigned long cr3; + unsigned long magic; +}; + +#define RESTORE_MAGIC 0x0123456789ABCDEFUL + +/** + * arch_hibernation_header_save - populate the architecture specific part + * of a hibernation image header + * @addr: address to save the data at + */ +int arch_hibernation_header_save(void *addr, unsigned int max_size) +{ + struct restore_data_record *rdr = addr; + + if (max_size < sizeof(struct restore_data_record)) + return -EOVERFLOW; + rdr->jump_address = restore_jump_address; + rdr->cr3 = restore_cr3; + rdr->magic = RESTORE_MAGIC; + return 0; +} + +/** + * arch_hibernation_header_restore - read the architecture specific data + * from the hibernation image header + * @addr: address to read the data from + */ +int arch_hibernation_header_restore(void *addr) +{ + struct restore_data_record *rdr = addr; + + restore_jump_address = rdr->jump_address; + restore_cr3 = rdr->cr3; + return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL; +} diff --git a/kernel/arch/x86/power/hibernate_asm_32.S b/kernel/arch/x86/power/hibernate_asm_32.S new file mode 100644 index 000000000..1d0fa0e24 --- /dev/null +++ b/kernel/arch/x86/power/hibernate_asm_32.S @@ -0,0 +1,84 @@ +/* + * This may not use any stack, nor any variable that is not "NoSave": + * + * Its rewriting one kernel image with another. What is stack in "old" + * image could very well be data page in "new" image, and overwriting + * your own stack under you is bad idea. + */ + +#include +#include +#include +#include +#include + +.text + +ENTRY(swsusp_arch_suspend) + movl %esp, saved_context_esp + movl %ebx, saved_context_ebx + movl %ebp, saved_context_ebp + movl %esi, saved_context_esi + movl %edi, saved_context_edi + pushfl + popl saved_context_eflags + + call swsusp_save + ret + +ENTRY(restore_image) + movl mmu_cr4_features, %ecx + movl resume_pg_dir, %eax + subl $__PAGE_OFFSET, %eax + movl %eax, %cr3 + + jecxz 1f # cr4 Pentium and higher, skip if zero + andl $~(X86_CR4_PGE), %ecx + movl %ecx, %cr4; # turn off PGE + movl %cr3, %eax; # flush TLB + movl %eax, %cr3 +1: + movl restore_pblist, %edx + .p2align 4,,7 + +copy_loop: + testl %edx, %edx + jz done + + movl pbe_address(%edx), %esi + movl pbe_orig_address(%edx), %edi + + movl $1024, %ecx + rep + movsl + + movl pbe_next(%edx), %edx + jmp copy_loop + .p2align 4,,7 + +done: + /* go back to the original page tables */ + movl $swapper_pg_dir, %eax + subl $__PAGE_OFFSET, %eax + movl %eax, %cr3 + movl mmu_cr4_features, %ecx + jecxz 1f # cr4 Pentium and higher, skip if zero + movl %ecx, %cr4; # turn PGE back on +1: + + movl saved_context_esp, %esp + movl saved_context_ebp, %ebp + movl saved_context_ebx, %ebx + movl saved_context_esi, %esi + movl saved_context_edi, %edi + + pushl saved_context_eflags + popfl + + /* Saved in save_processor_state. */ + movl $saved_context, %eax + lgdt saved_context_gdt_desc(%eax) + + xorl %eax, %eax + + ret diff --git a/kernel/arch/x86/power/hibernate_asm_64.S b/kernel/arch/x86/power/hibernate_asm_64.S new file mode 100644 index 000000000..3c4469a7a --- /dev/null +++ b/kernel/arch/x86/power/hibernate_asm_64.S @@ -0,0 +1,150 @@ +/* + * Hibernation support for x86-64 + * + * Distribute under GPLv2. + * + * Copyright 2007 Rafael J. Wysocki + * Copyright 2005 Andi Kleen + * Copyright 2004 Pavel Machek + * + * swsusp_arch_resume must not use any stack or any nonlocal variables while + * copying pages: + * + * Its rewriting one kernel image with another. What is stack in "old" + * image could very well be data page in "new" image, and overwriting + * your own stack under you is bad idea. + */ + + .text +#include +#include +#include +#include +#include + +ENTRY(swsusp_arch_suspend) + movq $saved_context, %rax + movq %rsp, pt_regs_sp(%rax) + movq %rbp, pt_regs_bp(%rax) + movq %rsi, pt_regs_si(%rax) + movq %rdi, pt_regs_di(%rax) + movq %rbx, pt_regs_bx(%rax) + movq %rcx, pt_regs_cx(%rax) + movq %rdx, pt_regs_dx(%rax) + movq %r8, pt_regs_r8(%rax) + movq %r9, pt_regs_r9(%rax) + movq %r10, pt_regs_r10(%rax) + movq %r11, pt_regs_r11(%rax) + movq %r12, pt_regs_r12(%rax) + movq %r13, pt_regs_r13(%rax) + movq %r14, pt_regs_r14(%rax) + movq %r15, pt_regs_r15(%rax) + pushfq + popq pt_regs_flags(%rax) + + /* save the address of restore_registers */ + movq $restore_registers, %rax + movq %rax, restore_jump_address(%rip) + /* save cr3 */ + movq %cr3, %rax + movq %rax, restore_cr3(%rip) + + call swsusp_save + ret + +ENTRY(restore_image) + /* switch to temporary page tables */ + movq $__PAGE_OFFSET, %rdx + movq temp_level4_pgt(%rip), %rax + subq %rdx, %rax + movq %rax, %cr3 + /* Flush TLB */ + movq mmu_cr4_features(%rip), %rax + movq %rax, %rdx + andq $~(X86_CR4_PGE), %rdx + movq %rdx, %cr4; # turn off PGE + movq %cr3, %rcx; # flush TLB + movq %rcx, %cr3; + movq %rax, %cr4; # turn PGE back on + + /* prepare to jump to the image kernel */ + movq restore_jump_address(%rip), %rax + movq restore_cr3(%rip), %rbx + + /* prepare to copy image data to their original locations */ + movq restore_pblist(%rip), %rdx + movq relocated_restore_code(%rip), %rcx + jmpq *%rcx + + /* code below has been relocated to a safe page */ +ENTRY(core_restore_code) +loop: + testq %rdx, %rdx + jz done + + /* get addresses from the pbe and copy the page */ + movq pbe_address(%rdx), %rsi + movq pbe_orig_address(%rdx), %rdi + movq $(PAGE_SIZE >> 3), %rcx + rep + movsq + + /* progress to the next pbe */ + movq pbe_next(%rdx), %rdx + jmp loop +done: + /* jump to the restore_registers address from the image header */ + jmpq *%rax + /* + * NOTE: This assumes that the boot kernel's text mapping covers the + * image kernel's page containing restore_registers and the address of + * this page is the same as in the image kernel's text mapping (it + * should always be true, because the text mapping is linear, starting + * from 0, and is supposed to cover the entire kernel text for every + * kernel). + * + * code below belongs to the image kernel + */ + +ENTRY(restore_registers) + /* go back to the original page tables */ + movq %rbx, %cr3 + + /* Flush TLB, including "global" things (vmalloc) */ + movq mmu_cr4_features(%rip), %rax + movq %rax, %rdx + andq $~(X86_CR4_PGE), %rdx + movq %rdx, %cr4; # turn off PGE + movq %cr3, %rcx; # flush TLB + movq %rcx, %cr3 + movq %rax, %cr4; # turn PGE back on + + /* We don't restore %rax, it must be 0 anyway */ + movq $saved_context, %rax + movq pt_regs_sp(%rax), %rsp + movq pt_regs_bp(%rax), %rbp + movq pt_regs_si(%rax), %rsi + movq pt_regs_di(%rax), %rdi + movq pt_regs_bx(%rax), %rbx + movq pt_regs_cx(%rax), %rcx + movq pt_regs_dx(%rax), %rdx + movq pt_regs_r8(%rax), %r8 + movq pt_regs_r9(%rax), %r9 + movq pt_regs_r10(%rax), %r10 + movq pt_regs_r11(%rax), %r11 + movq pt_regs_r12(%rax), %r12 + movq pt_regs_r13(%rax), %r13 + movq pt_regs_r14(%rax), %r14 + movq pt_regs_r15(%rax), %r15 + pushq pt_regs_flags(%rax) + popfq + + /* Saved in save_processor_state. */ + lgdt saved_context_gdt_desc(%rax) + + xorq %rax, %rax + + /* tell the hibernation core that we've just restored the memory */ + movq %rax, in_suspend(%rip) + + ret -- cgit 1.2.3-korg