diff options
Diffstat (limited to 'kernel/arch/x86/boot/compressed')
-rw-r--r-- | kernel/arch/x86/boot/compressed/.gitignore | 6 | ||||
-rw-r--r-- | kernel/arch/x86/boot/compressed/Makefile | 102 | ||||
-rw-r--r-- | kernel/arch/x86/boot/compressed/aslr.c | 339 | ||||
-rw-r--r-- | kernel/arch/x86/boot/compressed/cmdline.c | 33 | ||||
-rw-r--r-- | kernel/arch/x86/boot/compressed/cpuflags.c | 12 | ||||
-rw-r--r-- | kernel/arch/x86/boot/compressed/early_serial_console.c | 5 | ||||
-rw-r--r-- | kernel/arch/x86/boot/compressed/eboot.c | 1515 | ||||
-rw-r--r-- | kernel/arch/x86/boot/compressed/eboot.h | 106 | ||||
-rw-r--r-- | kernel/arch/x86/boot/compressed/efi_stub_32.S | 86 | ||||
-rw-r--r-- | kernel/arch/x86/boot/compressed/efi_stub_64.S | 5 | ||||
-rw-r--r-- | kernel/arch/x86/boot/compressed/efi_thunk_64.S | 196 | ||||
-rw-r--r-- | kernel/arch/x86/boot/compressed/head_32.S | 248 | ||||
-rw-r--r-- | kernel/arch/x86/boot/compressed/head_64.S | 480 | ||||
-rw-r--r-- | kernel/arch/x86/boot/compressed/misc.c | 437 | ||||
-rw-r--r-- | kernel/arch/x86/boot/compressed/misc.h | 88 | ||||
-rw-r--r-- | kernel/arch/x86/boot/compressed/mkpiggy.c | 104 | ||||
-rw-r--r-- | kernel/arch/x86/boot/compressed/string.c | 41 | ||||
-rw-r--r-- | kernel/arch/x86/boot/compressed/vmlinux.lds.S | 74 |
18 files changed, 3877 insertions, 0 deletions
diff --git a/kernel/arch/x86/boot/compressed/.gitignore b/kernel/arch/x86/boot/compressed/.gitignore new file mode 100644 index 000000000..4a46fab71 --- /dev/null +++ b/kernel/arch/x86/boot/compressed/.gitignore @@ -0,0 +1,6 @@ +relocs +vmlinux.bin.all +vmlinux.relocs +vmlinux.lds +mkpiggy +piggy.S diff --git a/kernel/arch/x86/boot/compressed/Makefile b/kernel/arch/x86/boot/compressed/Makefile new file mode 100644 index 000000000..0a291cdfa --- /dev/null +++ b/kernel/arch/x86/boot/compressed/Makefile @@ -0,0 +1,102 @@ +# +# linux/arch/x86/boot/compressed/Makefile +# +# create a compressed vmlinux image from the original vmlinux +# +# vmlinuz is: +# decompression code (*.o) +# asm globals (piggy.S), including: +# vmlinux.bin.(gz|bz2|lzma|...) +# +# vmlinux.bin is: +# vmlinux stripped of debugging and comments +# vmlinux.bin.all is: +# vmlinux.bin + vmlinux.relocs +# vmlinux.bin.(gz|bz2|lzma|...) is: +# (see scripts/Makefile.lib size_append) +# compressed vmlinux.bin.all + u32 size of vmlinux.bin.all + +KASAN_SANITIZE := n + +targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ + vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4 + +KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 +KBUILD_CFLAGS += -fno-strict-aliasing -fPIC +KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING +cflags-$(CONFIG_X86_32) := -march=i386 +cflags-$(CONFIG_X86_64) := -mcmodel=small +KBUILD_CFLAGS += $(cflags-y) +KBUILD_CFLAGS += -mno-mmx -mno-sse +KBUILD_CFLAGS += $(call cc-option,-ffreestanding) +KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector) + +KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ +GCOV_PROFILE := n + +LDFLAGS := -m elf_$(UTS_MACHINE) +LDFLAGS_vmlinux := -T + +hostprogs-y := mkpiggy +HOST_EXTRACFLAGS += -I$(srctree)/tools/include + +vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ + $(obj)/string.o $(obj)/cmdline.o \ + $(obj)/piggy.o $(obj)/cpuflags.o + +vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o +vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/aslr.o + +$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone + +vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \ + $(objtree)/drivers/firmware/efi/libstub/lib.a +vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o + +$(obj)/vmlinux: $(vmlinux-objs-y) FORCE + $(call if_changed,ld) + @: + +OBJCOPYFLAGS_vmlinux.bin := -R .comment -S +$(obj)/vmlinux.bin: vmlinux FORCE + $(call if_changed,objcopy) + +targets += $(patsubst $(obj)/%,%,$(vmlinux-objs-y)) vmlinux.bin.all vmlinux.relocs + +CMD_RELOCS = arch/x86/tools/relocs +quiet_cmd_relocs = RELOCS $@ + cmd_relocs = $(CMD_RELOCS) $< > $@;$(CMD_RELOCS) --abs-relocs $< +$(obj)/vmlinux.relocs: vmlinux FORCE + $(call if_changed,relocs) + +vmlinux.bin.all-y := $(obj)/vmlinux.bin +vmlinux.bin.all-$(CONFIG_X86_NEED_RELOCS) += $(obj)/vmlinux.relocs + +$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE + $(call if_changed,gzip) +$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE + $(call if_changed,bzip2) +$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE + $(call if_changed,lzma) +$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE + $(call if_changed,xzkern) +$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE + $(call if_changed,lzo) +$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE + $(call if_changed,lz4) + +suffix-$(CONFIG_KERNEL_GZIP) := gz +suffix-$(CONFIG_KERNEL_BZIP2) := bz2 +suffix-$(CONFIG_KERNEL_LZMA) := lzma +suffix-$(CONFIG_KERNEL_XZ) := xz +suffix-$(CONFIG_KERNEL_LZO) := lzo +suffix-$(CONFIG_KERNEL_LZ4) := lz4 + +RUN_SIZE = $(shell $(OBJDUMP) -h vmlinux | \ + $(CONFIG_SHELL) $(srctree)/arch/x86/tools/calc_run_size.sh) +quiet_cmd_mkpiggy = MKPIGGY $@ + cmd_mkpiggy = $(obj)/mkpiggy $< $(RUN_SIZE) > $@ || ( rm -f $@ ; false ) + +targets += piggy.S +$(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE + $(call if_changed,mkpiggy) diff --git a/kernel/arch/x86/boot/compressed/aslr.c b/kernel/arch/x86/boot/compressed/aslr.c new file mode 100644 index 000000000..d7b1f655b --- /dev/null +++ b/kernel/arch/x86/boot/compressed/aslr.c @@ -0,0 +1,339 @@ +#include "misc.h" + +#include <asm/msr.h> +#include <asm/archrandom.h> +#include <asm/e820.h> + +#include <generated/compile.h> +#include <linux/module.h> +#include <linux/uts.h> +#include <linux/utsname.h> +#include <generated/utsrelease.h> + +/* Simplified build-specific string for starting entropy. */ +static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" + LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; + +#define I8254_PORT_CONTROL 0x43 +#define I8254_PORT_COUNTER0 0x40 +#define I8254_CMD_READBACK 0xC0 +#define I8254_SELECT_COUNTER0 0x02 +#define I8254_STATUS_NOTREADY 0x40 +static inline u16 i8254(void) +{ + u16 status, timer; + + do { + outb(I8254_PORT_CONTROL, + I8254_CMD_READBACK | I8254_SELECT_COUNTER0); + status = inb(I8254_PORT_COUNTER0); + timer = inb(I8254_PORT_COUNTER0); + timer |= inb(I8254_PORT_COUNTER0) << 8; + } while (status & I8254_STATUS_NOTREADY); + + return timer; +} + +static unsigned long rotate_xor(unsigned long hash, const void *area, + size_t size) +{ + size_t i; + unsigned long *ptr = (unsigned long *)area; + + for (i = 0; i < size / sizeof(hash); i++) { + /* Rotate by odd number of bits and XOR. */ + hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7); + hash ^= ptr[i]; + } + + return hash; +} + +/* Attempt to create a simple but unpredictable starting entropy. */ +static unsigned long get_random_boot(void) +{ + unsigned long hash = 0; + + hash = rotate_xor(hash, build_str, sizeof(build_str)); + hash = rotate_xor(hash, real_mode, sizeof(*real_mode)); + + return hash; +} + +static unsigned long get_random_long(void) +{ +#ifdef CONFIG_X86_64 + const unsigned long mix_const = 0x5d6008cbf3848dd3UL; +#else + const unsigned long mix_const = 0x3f39e593UL; +#endif + unsigned long raw, random = get_random_boot(); + bool use_i8254 = true; + + debug_putstr("KASLR using"); + + if (has_cpuflag(X86_FEATURE_RDRAND)) { + debug_putstr(" RDRAND"); + if (rdrand_long(&raw)) { + random ^= raw; + use_i8254 = false; + } + } + + if (has_cpuflag(X86_FEATURE_TSC)) { + debug_putstr(" RDTSC"); + rdtscll(raw); + + random ^= raw; + use_i8254 = false; + } + + if (use_i8254) { + debug_putstr(" i8254"); + random ^= i8254(); + } + + /* Circular multiply for better bit diffusion */ + asm("mul %3" + : "=a" (random), "=d" (raw) + : "a" (random), "rm" (mix_const)); + random += raw; + + debug_putstr("...\n"); + + return random; +} + +struct mem_vector { + unsigned long start; + unsigned long size; +}; + +#define MEM_AVOID_MAX 5 +static struct mem_vector mem_avoid[MEM_AVOID_MAX]; + +static bool mem_contains(struct mem_vector *region, struct mem_vector *item) +{ + /* Item at least partially before region. */ + if (item->start < region->start) + return false; + /* Item at least partially after region. */ + if (item->start + item->size > region->start + region->size) + return false; + return true; +} + +static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) +{ + /* Item one is entirely before item two. */ + if (one->start + one->size <= two->start) + return false; + /* Item one is entirely after item two. */ + if (one->start >= two->start + two->size) + return false; + return true; +} + +static void mem_avoid_init(unsigned long input, unsigned long input_size, + unsigned long output, unsigned long output_size) +{ + u64 initrd_start, initrd_size; + u64 cmd_line, cmd_line_size; + unsigned long unsafe, unsafe_len; + char *ptr; + + /* + * Avoid the region that is unsafe to overlap during + * decompression (see calculations at top of misc.c). + */ + unsafe_len = (output_size >> 12) + 32768 + 18; + unsafe = (unsigned long)input + input_size - unsafe_len; + mem_avoid[0].start = unsafe; + mem_avoid[0].size = unsafe_len; + + /* Avoid initrd. */ + initrd_start = (u64)real_mode->ext_ramdisk_image << 32; + initrd_start |= real_mode->hdr.ramdisk_image; + initrd_size = (u64)real_mode->ext_ramdisk_size << 32; + initrd_size |= real_mode->hdr.ramdisk_size; + mem_avoid[1].start = initrd_start; + mem_avoid[1].size = initrd_size; + + /* Avoid kernel command line. */ + cmd_line = (u64)real_mode->ext_cmd_line_ptr << 32; + cmd_line |= real_mode->hdr.cmd_line_ptr; + /* Calculate size of cmd_line. */ + ptr = (char *)(unsigned long)cmd_line; + for (cmd_line_size = 0; ptr[cmd_line_size++]; ) + ; + mem_avoid[2].start = cmd_line; + mem_avoid[2].size = cmd_line_size; + + /* Avoid heap memory. */ + mem_avoid[3].start = (unsigned long)free_mem_ptr; + mem_avoid[3].size = BOOT_HEAP_SIZE; + + /* Avoid stack memory. */ + mem_avoid[4].start = (unsigned long)free_mem_end_ptr; + mem_avoid[4].size = BOOT_STACK_SIZE; +} + +/* Does this memory vector overlap a known avoided area? */ +static bool mem_avoid_overlap(struct mem_vector *img) +{ + int i; + struct setup_data *ptr; + + for (i = 0; i < MEM_AVOID_MAX; i++) { + if (mem_overlaps(img, &mem_avoid[i])) + return true; + } + + /* Avoid all entries in the setup_data linked list. */ + ptr = (struct setup_data *)(unsigned long)real_mode->hdr.setup_data; + while (ptr) { + struct mem_vector avoid; + + avoid.start = (unsigned long)ptr; + avoid.size = sizeof(*ptr) + ptr->len; + + if (mem_overlaps(img, &avoid)) + return true; + + ptr = (struct setup_data *)(unsigned long)ptr->next; + } + + return false; +} + +static unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET / + CONFIG_PHYSICAL_ALIGN]; +static unsigned long slot_max; + +static void slots_append(unsigned long addr) +{ + /* Overflowing the slots list should be impossible. */ + if (slot_max >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET / + CONFIG_PHYSICAL_ALIGN) + return; + + slots[slot_max++] = addr; +} + +static unsigned long slots_fetch_random(void) +{ + /* Handle case of no slots stored. */ + if (slot_max == 0) + return 0; + + return slots[get_random_long() % slot_max]; +} + +static void process_e820_entry(struct e820entry *entry, + unsigned long minimum, + unsigned long image_size) +{ + struct mem_vector region, img; + + /* Skip non-RAM entries. */ + if (entry->type != E820_RAM) + return; + + /* Ignore entries entirely above our maximum. */ + if (entry->addr >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET) + return; + + /* Ignore entries entirely below our minimum. */ + if (entry->addr + entry->size < minimum) + return; + + region.start = entry->addr; + region.size = entry->size; + + /* Potentially raise address to minimum location. */ + if (region.start < minimum) + region.start = minimum; + + /* Potentially raise address to meet alignment requirements. */ + region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); + + /* Did we raise the address above the bounds of this e820 region? */ + if (region.start > entry->addr + entry->size) + return; + + /* Reduce size by any delta from the original address. */ + region.size -= region.start - entry->addr; + + /* Reduce maximum size to fit end of image within maximum limit. */ + if (region.start + region.size > CONFIG_RANDOMIZE_BASE_MAX_OFFSET) + region.size = CONFIG_RANDOMIZE_BASE_MAX_OFFSET - region.start; + + /* Walk each aligned slot and check for avoided areas. */ + for (img.start = region.start, img.size = image_size ; + mem_contains(®ion, &img) ; + img.start += CONFIG_PHYSICAL_ALIGN) { + if (mem_avoid_overlap(&img)) + continue; + slots_append(img.start); + } +} + +static unsigned long find_random_addr(unsigned long minimum, + unsigned long size) +{ + int i; + unsigned long addr; + + /* Make sure minimum is aligned. */ + minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); + + /* Verify potential e820 positions, appending to slots list. */ + for (i = 0; i < real_mode->e820_entries; i++) { + process_e820_entry(&real_mode->e820_map[i], minimum, size); + } + + return slots_fetch_random(); +} + +unsigned char *choose_kernel_location(struct boot_params *boot_params, + unsigned char *input, + unsigned long input_size, + unsigned char *output, + unsigned long output_size) +{ + unsigned long choice = (unsigned long)output; + unsigned long random; + +#ifdef CONFIG_HIBERNATION + if (!cmdline_find_option_bool("kaslr")) { + debug_putstr("KASLR disabled by default...\n"); + goto out; + } +#else + if (cmdline_find_option_bool("nokaslr")) { + debug_putstr("KASLR disabled by cmdline...\n"); + goto out; + } +#endif + + boot_params->hdr.loadflags |= KASLR_FLAG; + + /* Record the various known unsafe memory ranges. */ + mem_avoid_init((unsigned long)input, input_size, + (unsigned long)output, output_size); + + /* Walk e820 and find a random address. */ + random = find_random_addr(choice, output_size); + if (!random) { + debug_putstr("KASLR could not find suitable E820 region...\n"); + goto out; + } + + /* Always enforce the minimum. */ + if (random < choice) + goto out; + + choice = random; +out: + return (unsigned char *)choice; +} diff --git a/kernel/arch/x86/boot/compressed/cmdline.c b/kernel/arch/x86/boot/compressed/cmdline.c new file mode 100644 index 000000000..b68e3033e --- /dev/null +++ b/kernel/arch/x86/boot/compressed/cmdline.c @@ -0,0 +1,33 @@ +#include "misc.h" + +#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE + +static unsigned long fs; +static inline void set_fs(unsigned long seg) +{ + fs = seg << 4; /* shift it back */ +} +typedef unsigned long addr_t; +static inline char rdfs8(addr_t addr) +{ + return *((char *)(fs + addr)); +} +#include "../cmdline.c" +static unsigned long get_cmd_line_ptr(void) +{ + unsigned long cmd_line_ptr = real_mode->hdr.cmd_line_ptr; + + cmd_line_ptr |= (u64)real_mode->ext_cmd_line_ptr << 32; + + return cmd_line_ptr; +} +int cmdline_find_option(const char *option, char *buffer, int bufsize) +{ + return __cmdline_find_option(get_cmd_line_ptr(), option, buffer, bufsize); +} +int cmdline_find_option_bool(const char *option) +{ + return __cmdline_find_option_bool(get_cmd_line_ptr(), option); +} + +#endif diff --git a/kernel/arch/x86/boot/compressed/cpuflags.c b/kernel/arch/x86/boot/compressed/cpuflags.c new file mode 100644 index 000000000..aa3134661 --- /dev/null +++ b/kernel/arch/x86/boot/compressed/cpuflags.c @@ -0,0 +1,12 @@ +#ifdef CONFIG_RANDOMIZE_BASE + +#include "../cpuflags.c" + +bool has_cpuflag(int flag) +{ + get_cpuflags(); + + return test_bit(flag, cpu.flags); +} + +#endif diff --git a/kernel/arch/x86/boot/compressed/early_serial_console.c b/kernel/arch/x86/boot/compressed/early_serial_console.c new file mode 100644 index 000000000..261e81fb9 --- /dev/null +++ b/kernel/arch/x86/boot/compressed/early_serial_console.c @@ -0,0 +1,5 @@ +#include "misc.h" + +int early_serial_base; + +#include "../early_serial_console.c" diff --git a/kernel/arch/x86/boot/compressed/eboot.c b/kernel/arch/x86/boot/compressed/eboot.c new file mode 100644 index 000000000..48304b89b --- /dev/null +++ b/kernel/arch/x86/boot/compressed/eboot.c @@ -0,0 +1,1515 @@ +/* ----------------------------------------------------------------------- + * + * Copyright 2011 Intel Corporation; author Matt Fleming + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2. + * + * ----------------------------------------------------------------------- */ + +#include <linux/efi.h> +#include <linux/pci.h> +#include <asm/efi.h> +#include <asm/setup.h> +#include <asm/desc.h> + +#include "../string.h" +#include "eboot.h" + +static efi_system_table_t *sys_table; + +static struct efi_config *efi_early; + +__pure const struct efi_config *__efi_early(void) +{ + return efi_early; +} + +#define BOOT_SERVICES(bits) \ +static void setup_boot_services##bits(struct efi_config *c) \ +{ \ + efi_system_table_##bits##_t *table; \ + efi_boot_services_##bits##_t *bt; \ + \ + table = (typeof(table))sys_table; \ + \ + c->text_output = table->con_out; \ + \ + bt = (typeof(bt))(unsigned long)(table->boottime); \ + \ + c->allocate_pool = bt->allocate_pool; \ + c->allocate_pages = bt->allocate_pages; \ + c->get_memory_map = bt->get_memory_map; \ + c->free_pool = bt->free_pool; \ + c->free_pages = bt->free_pages; \ + c->locate_handle = bt->locate_handle; \ + c->handle_protocol = bt->handle_protocol; \ + c->exit_boot_services = bt->exit_boot_services; \ +} +BOOT_SERVICES(32); +BOOT_SERVICES(64); + +void efi_char16_printk(efi_system_table_t *, efi_char16_t *); + +static efi_status_t +__file_size32(void *__fh, efi_char16_t *filename_16, + void **handle, u64 *file_sz) +{ + efi_file_handle_32_t *h, *fh = __fh; + efi_file_info_t *info; + efi_status_t status; + efi_guid_t info_guid = EFI_FILE_INFO_ID; + u32 info_sz; + + status = efi_early->call((unsigned long)fh->open, fh, &h, filename_16, + EFI_FILE_MODE_READ, (u64)0); + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "Failed to open file: "); + efi_char16_printk(sys_table, filename_16); + efi_printk(sys_table, "\n"); + return status; + } + + *handle = h; + + info_sz = 0; + status = efi_early->call((unsigned long)h->get_info, h, &info_guid, + &info_sz, NULL); + if (status != EFI_BUFFER_TOO_SMALL) { + efi_printk(sys_table, "Failed to get file info size\n"); + return status; + } + +grow: + status = efi_call_early(allocate_pool, EFI_LOADER_DATA, + info_sz, (void **)&info); + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "Failed to alloc mem for file info\n"); + return status; + } + + status = efi_early->call((unsigned long)h->get_info, h, &info_guid, + &info_sz, info); + if (status == EFI_BUFFER_TOO_SMALL) { + efi_call_early(free_pool, info); + goto grow; + } + + *file_sz = info->file_size; + efi_call_early(free_pool, info); + + if (status != EFI_SUCCESS) + efi_printk(sys_table, "Failed to get initrd info\n"); + + return status; +} + +static efi_status_t +__file_size64(void *__fh, efi_char16_t *filename_16, + void **handle, u64 *file_sz) +{ + efi_file_handle_64_t *h, *fh = __fh; + efi_file_info_t *info; + efi_status_t status; + efi_guid_t info_guid = EFI_FILE_INFO_ID; + u64 info_sz; + + status = efi_early->call((unsigned long)fh->open, fh, &h, filename_16, + EFI_FILE_MODE_READ, (u64)0); + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "Failed to open file: "); + efi_char16_printk(sys_table, filename_16); + efi_printk(sys_table, "\n"); + return status; + } + + *handle = h; + + info_sz = 0; + status = efi_early->call((unsigned long)h->get_info, h, &info_guid, + &info_sz, NULL); + if (status != EFI_BUFFER_TOO_SMALL) { + efi_printk(sys_table, "Failed to get file info size\n"); + return status; + } + +grow: + status = efi_call_early(allocate_pool, EFI_LOADER_DATA, + info_sz, (void **)&info); + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "Failed to alloc mem for file info\n"); + return status; + } + + status = efi_early->call((unsigned long)h->get_info, h, &info_guid, + &info_sz, info); + if (status == EFI_BUFFER_TOO_SMALL) { + efi_call_early(free_pool, info); + goto grow; + } + + *file_sz = info->file_size; + efi_call_early(free_pool, info); + + if (status != EFI_SUCCESS) + efi_printk(sys_table, "Failed to get initrd info\n"); + + return status; +} +efi_status_t +efi_file_size(efi_system_table_t *sys_table, void *__fh, + efi_char16_t *filename_16, void **handle, u64 *file_sz) +{ + if (efi_early->is64) + return __file_size64(__fh, filename_16, handle, file_sz); + + return __file_size32(__fh, filename_16, handle, file_sz); +} + +efi_status_t +efi_file_read(void *handle, unsigned long *size, void *addr) +{ + unsigned long func; + + if (efi_early->is64) { + efi_file_handle_64_t *fh = handle; + + func = (unsigned long)fh->read; + return efi_early->call(func, handle, size, addr); + } else { + efi_file_handle_32_t *fh = handle; + + func = (unsigned long)fh->read; + return efi_early->call(func, handle, size, addr); + } +} + +efi_status_t efi_file_close(void *handle) +{ + if (efi_early->is64) { + efi_file_handle_64_t *fh = handle; + + return efi_early->call((unsigned long)fh->close, handle); + } else { + efi_file_handle_32_t *fh = handle; + + return efi_early->call((unsigned long)fh->close, handle); + } +} + +static inline efi_status_t __open_volume32(void *__image, void **__fh) +{ + efi_file_io_interface_t *io; + efi_loaded_image_32_t *image = __image; + efi_file_handle_32_t *fh; + efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID; + efi_status_t status; + void *handle = (void *)(unsigned long)image->device_handle; + unsigned long func; + + status = efi_call_early(handle_protocol, handle, + &fs_proto, (void **)&io); + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "Failed to handle fs_proto\n"); + return status; + } + + func = (unsigned long)io->open_volume; + status = efi_early->call(func, io, &fh); + if (status != EFI_SUCCESS) + efi_printk(sys_table, "Failed to open volume\n"); + + *__fh = fh; + return status; +} + +static inline efi_status_t __open_volume64(void *__image, void **__fh) +{ + efi_file_io_interface_t *io; + efi_loaded_image_64_t *image = __image; + efi_file_handle_64_t *fh; + efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID; + efi_status_t status; + void *handle = (void *)(unsigned long)image->device_handle; + unsigned long func; + + status = efi_call_early(handle_protocol, handle, + &fs_proto, (void **)&io); + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "Failed to handle fs_proto\n"); + return status; + } + + func = (unsigned long)io->open_volume; + status = efi_early->call(func, io, &fh); + if (status != EFI_SUCCESS) + efi_printk(sys_table, "Failed to open volume\n"); + + *__fh = fh; + return status; +} + +efi_status_t +efi_open_volume(efi_system_table_t *sys_table, void *__image, void **__fh) +{ + if (efi_early->is64) + return __open_volume64(__image, __fh); + + return __open_volume32(__image, __fh); +} + +void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str) +{ + unsigned long output_string; + size_t offset; + + if (efi_early->is64) { + struct efi_simple_text_output_protocol_64 *out; + u64 *func; + + offset = offsetof(typeof(*out), output_string); + output_string = efi_early->text_output + offset; + out = (typeof(out))(unsigned long)efi_early->text_output; + func = (u64 *)output_string; + + efi_early->call(*func, out, str); + } else { + struct efi_simple_text_output_protocol_32 *out; + u32 *func; + + offset = offsetof(typeof(*out), output_string); + output_string = efi_early->text_output + offset; + out = (typeof(out))(unsigned long)efi_early->text_output; + func = (u32 *)output_string; + + efi_early->call(*func, out, str); + } +} + +static void find_bits(unsigned long mask, u8 *pos, u8 *size) +{ + u8 first, len; + + first = 0; + len = 0; + + if (mask) { + while (!(mask & 0x1)) { + mask = mask >> 1; + first++; + } + + while (mask & 0x1) { + mask = mask >> 1; + len++; + } + } + + *pos = first; + *size = len; +} + +static efi_status_t +__setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom) +{ + struct pci_setup_rom *rom = NULL; + efi_status_t status; + unsigned long size; + uint64_t attributes; + + status = efi_early->call(pci->attributes, pci, + EfiPciIoAttributeOperationGet, 0, 0, + &attributes); + if (status != EFI_SUCCESS) + return status; + + if (!pci->romimage || !pci->romsize) + return EFI_INVALID_PARAMETER; + + size = pci->romsize + sizeof(*rom); + + status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "Failed to alloc mem for rom\n"); + return status; + } + + memset(rom, 0, sizeof(*rom)); + + rom->data.type = SETUP_PCI; + rom->data.len = size - sizeof(struct setup_data); + rom->data.next = 0; + rom->pcilen = pci->romsize; + *__rom = rom; + + status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, + PCI_VENDOR_ID, 1, &(rom->vendor)); + + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "Failed to read rom->vendor\n"); + goto free_struct; + } + + status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, + PCI_DEVICE_ID, 1, &(rom->devid)); + + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "Failed to read rom->devid\n"); + goto free_struct; + } + + status = efi_early->call(pci->get_location, pci, &(rom->segment), + &(rom->bus), &(rom->device), &(rom->function)); + + if (status != EFI_SUCCESS) + goto free_struct; + + memcpy(rom->romdata, pci->romimage, pci->romsize); + return status; + +free_struct: + efi_call_early(free_pool, rom); + return status; +} + +static void +setup_efi_pci32(struct boot_params *params, void **pci_handle, + unsigned long size) +{ + efi_pci_io_protocol_32 *pci = NULL; + efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID; + u32 *handles = (u32 *)(unsigned long)pci_handle; + efi_status_t status; + unsigned long nr_pci; + struct setup_data *data; + int i; + + data = (struct setup_data *)(unsigned long)params->hdr.setup_data; + + while (data && data->next) + data = (struct setup_data *)(unsigned long)data->next; + + nr_pci = size / sizeof(u32); + for (i = 0; i < nr_pci; i++) { + struct pci_setup_rom *rom = NULL; + u32 h = handles[i]; + + status = efi_call_early(handle_protocol, h, + &pci_proto, (void **)&pci); + + if (status != EFI_SUCCESS) + continue; + + if (!pci) + continue; + + status = __setup_efi_pci32(pci, &rom); + if (status != EFI_SUCCESS) + continue; + + if (data) + data->next = (unsigned long)rom; + else + params->hdr.setup_data = (unsigned long)rom; + + data = (struct setup_data *)rom; + + } +} + +static efi_status_t +__setup_efi_pci64(efi_pci_io_protocol_64 *pci, struct pci_setup_rom **__rom) +{ + struct pci_setup_rom *rom; + efi_status_t status; + unsigned long size; + uint64_t attributes; + + status = efi_early->call(pci->attributes, pci, + EfiPciIoAttributeOperationGet, 0, + &attributes); + if (status != EFI_SUCCESS) + return status; + + if (!pci->romimage || !pci->romsize) + return EFI_INVALID_PARAMETER; + + size = pci->romsize + sizeof(*rom); + + status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "Failed to alloc mem for rom\n"); + return status; + } + + rom->data.type = SETUP_PCI; + rom->data.len = size - sizeof(struct setup_data); + rom->data.next = 0; + rom->pcilen = pci->romsize; + *__rom = rom; + + status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, + PCI_VENDOR_ID, 1, &(rom->vendor)); + + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "Failed to read rom->vendor\n"); + goto free_struct; + } + + status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, + PCI_DEVICE_ID, 1, &(rom->devid)); + + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "Failed to read rom->devid\n"); + goto free_struct; + } + + status = efi_early->call(pci->get_location, pci, &(rom->segment), + &(rom->bus), &(rom->device), &(rom->function)); + + if (status != EFI_SUCCESS) + goto free_struct; + + memcpy(rom->romdata, pci->romimage, pci->romsize); + return status; + +free_struct: + efi_call_early(free_pool, rom); + return status; + +} + +static void +setup_efi_pci64(struct boot_params *params, void **pci_handle, + unsigned long size) +{ + efi_pci_io_protocol_64 *pci = NULL; + efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID; + u64 *handles = (u64 *)(unsigned long)pci_handle; + efi_status_t status; + unsigned long nr_pci; + struct setup_data *data; + int i; + + data = (struct setup_data *)(unsigned long)params->hdr.setup_data; + + while (data && data->next) + data = (struct setup_data *)(unsigned long)data->next; + + nr_pci = size / sizeof(u64); + for (i = 0; i < nr_pci; i++) { + struct pci_setup_rom *rom = NULL; + u64 h = handles[i]; + + status = efi_call_early(handle_protocol, h, + &pci_proto, (void **)&pci); + + if (status != EFI_SUCCESS) + continue; + + if (!pci) + continue; + + status = __setup_efi_pci64(pci, &rom); + if (status != EFI_SUCCESS) + continue; + + if (data) + data->next = (unsigned long)rom; + else + params->hdr.setup_data = (unsigned long)rom; + + data = (struct setup_data *)rom; + + } +} + +/* + * There's no way to return an informative status from this function, + * because any analysis (and printing of error messages) needs to be + * done directly at the EFI function call-site. + * + * For example, EFI_INVALID_PARAMETER could indicate a bug or maybe we + * just didn't find any PCI devices, but there's no way to tell outside + * the context of the call. + */ +static void setup_efi_pci(struct boot_params *params) +{ + efi_status_t status; + void **pci_handle = NULL; + efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID; + unsigned long size = 0; + + status = efi_call_early(locate_handle, + EFI_LOCATE_BY_PROTOCOL, + &pci_proto, NULL, &size, pci_handle); + + if (status == EFI_BUFFER_TOO_SMALL) { + status = efi_call_early(allocate_pool, + EFI_LOADER_DATA, + size, (void **)&pci_handle); + + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "Failed to alloc mem for pci_handle\n"); + return; + } + + status = efi_call_early(locate_handle, + EFI_LOCATE_BY_PROTOCOL, &pci_proto, + NULL, &size, pci_handle); + } + + if (status != EFI_SUCCESS) + goto free_handle; + + if (efi_early->is64) + setup_efi_pci64(params, pci_handle, size); + else + setup_efi_pci32(params, pci_handle, size); + +free_handle: + efi_call_early(free_pool, pci_handle); +} + +static void +setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line, + struct efi_pixel_bitmask pixel_info, int pixel_format) +{ + if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) { + si->lfb_depth = 32; + si->lfb_linelength = pixels_per_scan_line * 4; + si->red_size = 8; + si->red_pos = 0; + si->green_size = 8; + si->green_pos = 8; + si->blue_size = 8; + si->blue_pos = 16; + si->rsvd_size = 8; + si->rsvd_pos = 24; + } else if (pixel_format == PIXEL_BGR_RESERVED_8BIT_PER_COLOR) { + si->lfb_depth = 32; + si->lfb_linelength = pixels_per_scan_line * 4; + si->red_size = 8; + si->red_pos = 16; + si->green_size = 8; + si->green_pos = 8; + si->blue_size = 8; + si->blue_pos = 0; + si->rsvd_size = 8; + si->rsvd_pos = 24; + } else if (pixel_format == PIXEL_BIT_MASK) { + find_bits(pixel_info.red_mask, &si->red_pos, &si->red_size); + find_bits(pixel_info.green_mask, &si->green_pos, + &si->green_size); + find_bits(pixel_info.blue_mask, &si->blue_pos, &si->blue_size); + find_bits(pixel_info.reserved_mask, &si->rsvd_pos, + &si->rsvd_size); + si->lfb_depth = si->red_size + si->green_size + + si->blue_size + si->rsvd_size; + si->lfb_linelength = (pixels_per_scan_line * si->lfb_depth) / 8; + } else { + si->lfb_depth = 4; + si->lfb_linelength = si->lfb_width / 2; + si->red_size = 0; + si->red_pos = 0; + si->green_size = 0; + si->green_pos = 0; + si->blue_size = 0; + si->blue_pos = 0; + si->rsvd_size = 0; + si->rsvd_pos = 0; + } +} + +static efi_status_t +__gop_query32(struct efi_graphics_output_protocol_32 *gop32, + struct efi_graphics_output_mode_info **info, + unsigned long *size, u32 *fb_base) +{ + struct efi_graphics_output_protocol_mode_32 *mode; + efi_status_t status; + unsigned long m; + + m = gop32->mode; + mode = (struct efi_graphics_output_protocol_mode_32 *)m; + + status = efi_early->call(gop32->query_mode, gop32, + mode->mode, size, info); + if (status != EFI_SUCCESS) + return status; + + *fb_base = mode->frame_buffer_base; + return status; +} + +static efi_status_t +setup_gop32(struct screen_info *si, efi_guid_t *proto, + unsigned long size, void **gop_handle) +{ + struct efi_graphics_output_protocol_32 *gop32, *first_gop; + unsigned long nr_gops; + u16 width, height; + u32 pixels_per_scan_line; + u32 fb_base; + struct efi_pixel_bitmask pixel_info; + int pixel_format; + efi_status_t status; + u32 *handles = (u32 *)(unsigned long)gop_handle; + int i; + + first_gop = NULL; + gop32 = NULL; + + nr_gops = size / sizeof(u32); + for (i = 0; i < nr_gops; i++) { + struct efi_graphics_output_mode_info *info = NULL; + efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; + bool conout_found = false; + void *dummy = NULL; + u32 h = handles[i]; + + status = efi_call_early(handle_protocol, h, + proto, (void **)&gop32); + if (status != EFI_SUCCESS) + continue; + + status = efi_call_early(handle_protocol, h, + &conout_proto, &dummy); + if (status == EFI_SUCCESS) + conout_found = true; + + status = __gop_query32(gop32, &info, &size, &fb_base); + if (status == EFI_SUCCESS && (!first_gop || conout_found)) { + /* + * Systems that use the UEFI Console Splitter may + * provide multiple GOP devices, not all of which are + * backed by real hardware. The workaround is to search + * for a GOP implementing the ConOut protocol, and if + * one isn't found, to just fall back to the first GOP. + */ + width = info->horizontal_resolution; + height = info->vertical_resolution; + pixel_format = info->pixel_format; + pixel_info = info->pixel_information; + pixels_per_scan_line = info->pixels_per_scan_line; + + /* + * Once we've found a GOP supporting ConOut, + * don't bother looking any further. + */ + first_gop = gop32; + if (conout_found) + break; + } + } + + /* Did we find any GOPs? */ + if (!first_gop) + goto out; + + /* EFI framebuffer */ + si->orig_video_isVGA = VIDEO_TYPE_EFI; + + si->lfb_width = width; + si->lfb_height = height; + si->lfb_base = fb_base; + si->pages = 1; + + setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format); + + si->lfb_size = si->lfb_linelength * si->lfb_height; + + si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS; +out: + return status; +} + +static efi_status_t +__gop_query64(struct efi_graphics_output_protocol_64 *gop64, + struct efi_graphics_output_mode_info **info, + unsigned long *size, u32 *fb_base) +{ + struct efi_graphics_output_protocol_mode_64 *mode; + efi_status_t status; + unsigned long m; + + m = gop64->mode; + mode = (struct efi_graphics_output_protocol_mode_64 *)m; + + status = efi_early->call(gop64->query_mode, gop64, + mode->mode, size, info); + if (status != EFI_SUCCESS) + return status; + + *fb_base = mode->frame_buffer_base; + return status; +} + +static efi_status_t +setup_gop64(struct screen_info *si, efi_guid_t *proto, + unsigned long size, void **gop_handle) +{ + struct efi_graphics_output_protocol_64 *gop64, *first_gop; + unsigned long nr_gops; + u16 width, height; + u32 pixels_per_scan_line; + u32 fb_base; + struct efi_pixel_bitmask pixel_info; + int pixel_format; + efi_status_t status; + u64 *handles = (u64 *)(unsigned long)gop_handle; + int i; + + first_gop = NULL; + gop64 = NULL; + + nr_gops = size / sizeof(u64); + for (i = 0; i < nr_gops; i++) { + struct efi_graphics_output_mode_info *info = NULL; + efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; + bool conout_found = false; + void *dummy = NULL; + u64 h = handles[i]; + + status = efi_call_early(handle_protocol, h, + proto, (void **)&gop64); + if (status != EFI_SUCCESS) + continue; + + status = efi_call_early(handle_protocol, h, + &conout_proto, &dummy); + if (status == EFI_SUCCESS) + conout_found = true; + + status = __gop_query64(gop64, &info, &size, &fb_base); + if (status == EFI_SUCCESS && (!first_gop || conout_found)) { + /* + * Systems that use the UEFI Console Splitter may + * provide multiple GOP devices, not all of which are + * backed by real hardware. The workaround is to search + * for a GOP implementing the ConOut protocol, and if + * one isn't found, to just fall back to the first GOP. + */ + width = info->horizontal_resolution; + height = info->vertical_resolution; + pixel_format = info->pixel_format; + pixel_info = info->pixel_information; + pixels_per_scan_line = info->pixels_per_scan_line; + + /* + * Once we've found a GOP supporting ConOut, + * don't bother looking any further. + */ + first_gop = gop64; + if (conout_found) + break; + } + } + + /* Did we find any GOPs? */ + if (!first_gop) + goto out; + + /* EFI framebuffer */ + si->orig_video_isVGA = VIDEO_TYPE_EFI; + + si->lfb_width = width; + si->lfb_height = height; + si->lfb_base = fb_base; + si->pages = 1; + + setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format); + + si->lfb_size = si->lfb_linelength * si->lfb_height; + + si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS; +out: + return status; +} + +/* + * See if we have Graphics Output Protocol + */ +static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, + unsigned long size) +{ + efi_status_t status; + void **gop_handle = NULL; + + status = efi_call_early(allocate_pool, EFI_LOADER_DATA, + size, (void **)&gop_handle); + if (status != EFI_SUCCESS) + return status; + + status = efi_call_early(locate_handle, + EFI_LOCATE_BY_PROTOCOL, + proto, NULL, &size, gop_handle); + if (status != EFI_SUCCESS) + goto free_handle; + + if (efi_early->is64) + status = setup_gop64(si, proto, size, gop_handle); + else + status = setup_gop32(si, proto, size, gop_handle); + +free_handle: + efi_call_early(free_pool, gop_handle); + return status; +} + +static efi_status_t +setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height) +{ + struct efi_uga_draw_protocol *uga = NULL, *first_uga; + efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID; + unsigned long nr_ugas; + u32 *handles = (u32 *)uga_handle;; + efi_status_t status; + int i; + + first_uga = NULL; + nr_ugas = size / sizeof(u32); + for (i = 0; i < nr_ugas; i++) { + efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID; + u32 w, h, depth, refresh; + void *pciio; + u32 handle = handles[i]; + + status = efi_call_early(handle_protocol, handle, + &uga_proto, (void **)&uga); + if (status != EFI_SUCCESS) + continue; + + efi_call_early(handle_protocol, handle, &pciio_proto, &pciio); + + status = efi_early->call((unsigned long)uga->get_mode, uga, + &w, &h, &depth, &refresh); + if (status == EFI_SUCCESS && (!first_uga || pciio)) { + *width = w; + *height = h; + + /* + * Once we've found a UGA supporting PCIIO, + * don't bother looking any further. + */ + if (pciio) + break; + + first_uga = uga; + } + } + + return status; +} + +static efi_status_t +setup_uga64(void **uga_handle, unsigned long size, u32 *width, u32 *height) +{ + struct efi_uga_draw_protocol *uga = NULL, *first_uga; + efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID; + unsigned long nr_ugas; + u64 *handles = (u64 *)uga_handle;; + efi_status_t status; + int i; + + first_uga = NULL; + nr_ugas = size / sizeof(u64); + for (i = 0; i < nr_ugas; i++) { + efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID; + u32 w, h, depth, refresh; + void *pciio; + u64 handle = handles[i]; + + status = efi_call_early(handle_protocol, handle, + &uga_proto, (void **)&uga); + if (status != EFI_SUCCESS) + continue; + + efi_call_early(handle_protocol, handle, &pciio_proto, &pciio); + + status = efi_early->call((unsigned long)uga->get_mode, uga, + &w, &h, &depth, &refresh); + if (status == EFI_SUCCESS && (!first_uga || pciio)) { + *width = w; + *height = h; + + /* + * Once we've found a UGA supporting PCIIO, + * don't bother looking any further. + */ + if (pciio) + break; + + first_uga = uga; + } + } + + return status; +} + +/* + * See if we have Universal Graphics Adapter (UGA) protocol + */ +static efi_status_t setup_uga(struct screen_info *si, efi_guid_t *uga_proto, + unsigned long size) +{ + efi_status_t status; + u32 width, height; + void **uga_handle = NULL; + + status = efi_call_early(allocate_pool, EFI_LOADER_DATA, + size, (void **)&uga_handle); + if (status != EFI_SUCCESS) + return status; + + status = efi_call_early(locate_handle, + EFI_LOCATE_BY_PROTOCOL, + uga_proto, NULL, &size, uga_handle); + if (status != EFI_SUCCESS) + goto free_handle; + + height = 0; + width = 0; + + if (efi_early->is64) + status = setup_uga64(uga_handle, size, &width, &height); + else + status = setup_uga32(uga_handle, size, &width, &height); + + if (!width && !height) + goto free_handle; + + /* EFI framebuffer */ + si->orig_video_isVGA = VIDEO_TYPE_EFI; + + si->lfb_depth = 32; + si->lfb_width = width; + si->lfb_height = height; + + si->red_size = 8; + si->red_pos = 16; + si->green_size = 8; + si->green_pos = 8; + si->blue_size = 8; + si->blue_pos = 0; + si->rsvd_size = 8; + si->rsvd_pos = 24; + +free_handle: + efi_call_early(free_pool, uga_handle); + return status; +} + +void setup_graphics(struct boot_params *boot_params) +{ + efi_guid_t graphics_proto = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID; + struct screen_info *si; + efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID; + efi_status_t status; + unsigned long size; + void **gop_handle = NULL; + void **uga_handle = NULL; + + si = &boot_params->screen_info; + memset(si, 0, sizeof(*si)); + + size = 0; + status = efi_call_early(locate_handle, + EFI_LOCATE_BY_PROTOCOL, + &graphics_proto, NULL, &size, gop_handle); + if (status == EFI_BUFFER_TOO_SMALL) + status = setup_gop(si, &graphics_proto, size); + + if (status != EFI_SUCCESS) { + size = 0; + status = efi_call_early(locate_handle, + EFI_LOCATE_BY_PROTOCOL, + &uga_proto, NULL, &size, uga_handle); + if (status == EFI_BUFFER_TOO_SMALL) + setup_uga(si, &uga_proto, size); + } +} + +/* + * Because the x86 boot code expects to be passed a boot_params we + * need to create one ourselves (usually the bootloader would create + * one for us). + * + * The caller is responsible for filling out ->code32_start in the + * returned boot_params. + */ +struct boot_params *make_boot_params(struct efi_config *c) +{ + struct boot_params *boot_params; + struct sys_desc_table *sdt; + struct apm_bios_info *bi; + struct setup_header *hdr; + struct efi_info *efi; + efi_loaded_image_t *image; + void *options, *handle; + efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID; + int options_size = 0; + efi_status_t status; + char *cmdline_ptr; + u16 *s2; + u8 *s1; + int i; + unsigned long ramdisk_addr; + unsigned long ramdisk_size; + + efi_early = c; + sys_table = (efi_system_table_t *)(unsigned long)efi_early->table; + handle = (void *)(unsigned long)efi_early->image_handle; + + /* Check if we were booted by the EFI firmware */ + if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) + return NULL; + + if (efi_early->is64) + setup_boot_services64(efi_early); + else + setup_boot_services32(efi_early); + + status = efi_call_early(handle_protocol, handle, + &proto, (void *)&image); + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "Failed to get handle for LOADED_IMAGE_PROTOCOL\n"); + return NULL; + } + + status = efi_low_alloc(sys_table, 0x4000, 1, + (unsigned long *)&boot_params); + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "Failed to alloc lowmem for boot params\n"); + return NULL; + } + + memset(boot_params, 0x0, 0x4000); + + hdr = &boot_params->hdr; + efi = &boot_params->efi_info; + bi = &boot_params->apm_bios_info; + sdt = &boot_params->sys_desc_table; + + /* Copy the second sector to boot_params */ + memcpy(&hdr->jump, image->image_base + 512, 512); + + /* + * Fill out some of the header fields ourselves because the + * EFI firmware loader doesn't load the first sector. + */ + hdr->root_flags = 1; + hdr->vid_mode = 0xffff; + hdr->boot_flag = 0xAA55; + + hdr->type_of_loader = 0x21; + + /* Convert unicode cmdline to ascii */ + cmdline_ptr = efi_convert_cmdline(sys_table, image, &options_size); + if (!cmdline_ptr) + goto fail; + hdr->cmd_line_ptr = (unsigned long)cmdline_ptr; + /* Fill in upper bits of command line address, NOP on 32 bit */ + boot_params->ext_cmd_line_ptr = (u64)(unsigned long)cmdline_ptr >> 32; + + hdr->ramdisk_image = 0; + hdr->ramdisk_size = 0; + + /* Clear APM BIOS info */ + memset(bi, 0, sizeof(*bi)); + + memset(sdt, 0, sizeof(*sdt)); + + status = efi_parse_options(cmdline_ptr); + if (status != EFI_SUCCESS) + goto fail2; + + status = handle_cmdline_files(sys_table, image, + (char *)(unsigned long)hdr->cmd_line_ptr, + "initrd=", hdr->initrd_addr_max, + &ramdisk_addr, &ramdisk_size); + + if (status != EFI_SUCCESS && + hdr->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G) { + efi_printk(sys_table, "Trying to load files to higher address\n"); + status = handle_cmdline_files(sys_table, image, + (char *)(unsigned long)hdr->cmd_line_ptr, + "initrd=", -1UL, + &ramdisk_addr, &ramdisk_size); + } + + if (status != EFI_SUCCESS) + goto fail2; + hdr->ramdisk_image = ramdisk_addr & 0xffffffff; + hdr->ramdisk_size = ramdisk_size & 0xffffffff; + boot_params->ext_ramdisk_image = (u64)ramdisk_addr >> 32; + boot_params->ext_ramdisk_size = (u64)ramdisk_size >> 32; + + return boot_params; +fail2: + efi_free(sys_table, options_size, hdr->cmd_line_ptr); +fail: + efi_free(sys_table, 0x4000, (unsigned long)boot_params); + return NULL; +} + +static void add_e820ext(struct boot_params *params, + struct setup_data *e820ext, u32 nr_entries) +{ + struct setup_data *data; + efi_status_t status; + unsigned long size; + + e820ext->type = SETUP_E820_EXT; + e820ext->len = nr_entries * sizeof(struct e820entry); + e820ext->next = 0; + + data = (struct setup_data *)(unsigned long)params->hdr.setup_data; + + while (data && data->next) + data = (struct setup_data *)(unsigned long)data->next; + + if (data) + data->next = (unsigned long)e820ext; + else + params->hdr.setup_data = (unsigned long)e820ext; +} + +static efi_status_t setup_e820(struct boot_params *params, + struct setup_data *e820ext, u32 e820ext_size) +{ + struct e820entry *e820_map = ¶ms->e820_map[0]; + struct efi_info *efi = ¶ms->efi_info; + struct e820entry *prev = NULL; + u32 nr_entries; + u32 nr_desc; + int i; + + nr_entries = 0; + nr_desc = efi->efi_memmap_size / efi->efi_memdesc_size; + + for (i = 0; i < nr_desc; i++) { + efi_memory_desc_t *d; + unsigned int e820_type = 0; + unsigned long m = efi->efi_memmap; + + d = (efi_memory_desc_t *)(m + (i * efi->efi_memdesc_size)); + switch (d->type) { + case EFI_RESERVED_TYPE: + case EFI_RUNTIME_SERVICES_CODE: + case EFI_RUNTIME_SERVICES_DATA: + case EFI_MEMORY_MAPPED_IO: + case EFI_MEMORY_MAPPED_IO_PORT_SPACE: + case EFI_PAL_CODE: + e820_type = E820_RESERVED; + break; + + case EFI_UNUSABLE_MEMORY: + e820_type = E820_UNUSABLE; + break; + + case EFI_ACPI_RECLAIM_MEMORY: + e820_type = E820_ACPI; + break; + + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + case EFI_CONVENTIONAL_MEMORY: + e820_type = E820_RAM; + break; + + case EFI_ACPI_MEMORY_NVS: + e820_type = E820_NVS; + break; + + default: + continue; + } + + /* Merge adjacent mappings */ + if (prev && prev->type == e820_type && + (prev->addr + prev->size) == d->phys_addr) { + prev->size += d->num_pages << 12; + continue; + } + + if (nr_entries == ARRAY_SIZE(params->e820_map)) { + u32 need = (nr_desc - i) * sizeof(struct e820entry) + + sizeof(struct setup_data); + + if (!e820ext || e820ext_size < need) + return EFI_BUFFER_TOO_SMALL; + + /* boot_params map full, switch to e820 extended */ + e820_map = (struct e820entry *)e820ext->data; + } + + e820_map->addr = d->phys_addr; + e820_map->size = d->num_pages << PAGE_SHIFT; + e820_map->type = e820_type; + prev = e820_map++; + nr_entries++; + } + + if (nr_entries > ARRAY_SIZE(params->e820_map)) { + u32 nr_e820ext = nr_entries - ARRAY_SIZE(params->e820_map); + + add_e820ext(params, e820ext, nr_e820ext); + nr_entries -= nr_e820ext; + } + + params->e820_entries = (u8)nr_entries; + + return EFI_SUCCESS; +} + +static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext, + u32 *e820ext_size) +{ + efi_status_t status; + unsigned long size; + + size = sizeof(struct setup_data) + + sizeof(struct e820entry) * nr_desc; + + if (*e820ext) { + efi_call_early(free_pool, *e820ext); + *e820ext = NULL; + *e820ext_size = 0; + } + + status = efi_call_early(allocate_pool, EFI_LOADER_DATA, + size, (void **)e820ext); + if (status == EFI_SUCCESS) + *e820ext_size = size; + + return status; +} + +static efi_status_t exit_boot(struct boot_params *boot_params, + void *handle, bool is64) +{ + struct efi_info *efi = &boot_params->efi_info; + unsigned long map_sz, key, desc_size; + efi_memory_desc_t *mem_map; + struct setup_data *e820ext; + const char *signature; + __u32 e820ext_size; + __u32 nr_desc, prev_nr_desc; + efi_status_t status; + __u32 desc_version; + bool called_exit = false; + u8 nr_entries; + int i; + + nr_desc = 0; + e820ext = NULL; + e820ext_size = 0; + +get_map: + status = efi_get_memory_map(sys_table, &mem_map, &map_sz, &desc_size, + &desc_version, &key); + + if (status != EFI_SUCCESS) + return status; + + prev_nr_desc = nr_desc; + nr_desc = map_sz / desc_size; + if (nr_desc > prev_nr_desc && + nr_desc > ARRAY_SIZE(boot_params->e820_map)) { + u32 nr_e820ext = nr_desc - ARRAY_SIZE(boot_params->e820_map); + + status = alloc_e820ext(nr_e820ext, &e820ext, &e820ext_size); + if (status != EFI_SUCCESS) + goto free_mem_map; + + efi_call_early(free_pool, mem_map); + goto get_map; /* Allocated memory, get map again */ + } + + signature = is64 ? EFI64_LOADER_SIGNATURE : EFI32_LOADER_SIGNATURE; + memcpy(&efi->efi_loader_signature, signature, sizeof(__u32)); + + efi->efi_systab = (unsigned long)sys_table; + efi->efi_memdesc_size = desc_size; + efi->efi_memdesc_version = desc_version; + efi->efi_memmap = (unsigned long)mem_map; + efi->efi_memmap_size = map_sz; + +#ifdef CONFIG_X86_64 + efi->efi_systab_hi = (unsigned long)sys_table >> 32; + efi->efi_memmap_hi = (unsigned long)mem_map >> 32; +#endif + + /* Might as well exit boot services now */ + status = efi_call_early(exit_boot_services, handle, key); + if (status != EFI_SUCCESS) { + /* + * ExitBootServices() will fail if any of the event + * handlers change the memory map. In which case, we + * must be prepared to retry, but only once so that + * we're guaranteed to exit on repeated failures instead + * of spinning forever. + */ + if (called_exit) + goto free_mem_map; + + called_exit = true; + efi_call_early(free_pool, mem_map); + goto get_map; + } + + /* Historic? */ + boot_params->alt_mem_k = 32 * 1024; + + status = setup_e820(boot_params, e820ext, e820ext_size); + if (status != EFI_SUCCESS) + return status; + + return EFI_SUCCESS; + +free_mem_map: + efi_call_early(free_pool, mem_map); + return status; +} + +/* + * On success we return a pointer to a boot_params structure, and NULL + * on failure. + */ +struct boot_params *efi_main(struct efi_config *c, + struct boot_params *boot_params) +{ + struct desc_ptr *gdt = NULL; + efi_loaded_image_t *image; + struct setup_header *hdr = &boot_params->hdr; + efi_status_t status; + struct desc_struct *desc; + void *handle; + efi_system_table_t *_table; + bool is64; + + efi_early = c; + + _table = (efi_system_table_t *)(unsigned long)efi_early->table; + handle = (void *)(unsigned long)efi_early->image_handle; + is64 = efi_early->is64; + + sys_table = _table; + + /* Check if we were booted by the EFI firmware */ + if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) + goto fail; + + if (is64) + setup_boot_services64(efi_early); + else + setup_boot_services32(efi_early); + + setup_graphics(boot_params); + + setup_efi_pci(boot_params); + + status = efi_call_early(allocate_pool, EFI_LOADER_DATA, + sizeof(*gdt), (void **)&gdt); + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "Failed to alloc mem for gdt structure\n"); + goto fail; + } + + gdt->size = 0x800; + status = efi_low_alloc(sys_table, gdt->size, 8, + (unsigned long *)&gdt->address); + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "Failed to alloc mem for gdt\n"); + goto fail; + } + + /* + * If the kernel isn't already loaded at the preferred load + * address, relocate it. + */ + if (hdr->pref_address != hdr->code32_start) { + unsigned long bzimage_addr = hdr->code32_start; + status = efi_relocate_kernel(sys_table, &bzimage_addr, + hdr->init_size, hdr->init_size, + hdr->pref_address, + hdr->kernel_alignment); + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "efi_relocate_kernel() failed!\n"); + goto fail; + } + + hdr->pref_address = hdr->code32_start; + hdr->code32_start = bzimage_addr; + } + + status = exit_boot(boot_params, handle, is64); + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "exit_boot() failed!\n"); + goto fail; + } + + memset((char *)gdt->address, 0x0, gdt->size); + desc = (struct desc_struct *)gdt->address; + + /* The first GDT is a dummy and the second is unused. */ + desc += 2; + + desc->limit0 = 0xffff; + desc->base0 = 0x0000; + desc->base1 = 0x0000; + desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ; + desc->s = DESC_TYPE_CODE_DATA; + desc->dpl = 0; + desc->p = 1; + desc->limit = 0xf; + desc->avl = 0; + desc->l = 0; + desc->d = SEG_OP_SIZE_32BIT; + desc->g = SEG_GRANULARITY_4KB; + desc->base2 = 0x00; + + desc++; + desc->limit0 = 0xffff; + desc->base0 = 0x0000; + desc->base1 = 0x0000; + desc->type = SEG_TYPE_DATA | SEG_TYPE_READ_WRITE; + desc->s = DESC_TYPE_CODE_DATA; + desc->dpl = 0; + desc->p = 1; + desc->limit = 0xf; + desc->avl = 0; + desc->l = 0; + desc->d = SEG_OP_SIZE_32BIT; + desc->g = SEG_GRANULARITY_4KB; + desc->base2 = 0x00; + +#ifdef CONFIG_X86_64 + /* Task segment value */ + desc++; + desc->limit0 = 0x0000; + desc->base0 = 0x0000; + desc->base1 = 0x0000; + desc->type = SEG_TYPE_TSS; + desc->s = 0; + desc->dpl = 0; + desc->p = 1; + desc->limit = 0x0; + desc->avl = 0; + desc->l = 0; + desc->d = 0; + desc->g = SEG_GRANULARITY_4KB; + desc->base2 = 0x00; +#endif /* CONFIG_X86_64 */ + + asm volatile("cli"); + asm volatile ("lgdt %0" : : "m" (*gdt)); + + return boot_params; +fail: + efi_printk(sys_table, "efi_main() failed!\n"); + return NULL; +} diff --git a/kernel/arch/x86/boot/compressed/eboot.h b/kernel/arch/x86/boot/compressed/eboot.h new file mode 100644 index 000000000..d487e727f --- /dev/null +++ b/kernel/arch/x86/boot/compressed/eboot.h @@ -0,0 +1,106 @@ +#ifndef BOOT_COMPRESSED_EBOOT_H +#define BOOT_COMPRESSED_EBOOT_H + +#define SEG_TYPE_DATA (0 << 3) +#define SEG_TYPE_READ_WRITE (1 << 1) +#define SEG_TYPE_CODE (1 << 3) +#define SEG_TYPE_EXEC_READ (1 << 1) +#define SEG_TYPE_TSS ((1 << 3) | (1 << 0)) +#define SEG_OP_SIZE_32BIT (1 << 0) +#define SEG_GRANULARITY_4KB (1 << 0) + +#define DESC_TYPE_CODE_DATA (1 << 0) + +#define EFI_CONSOLE_OUT_DEVICE_GUID \ + EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x0, 0x90, 0x27, \ + 0x3f, 0xc1, 0x4d) + +#define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0 +#define PIXEL_BGR_RESERVED_8BIT_PER_COLOR 1 +#define PIXEL_BIT_MASK 2 +#define PIXEL_BLT_ONLY 3 +#define PIXEL_FORMAT_MAX 4 + +struct efi_pixel_bitmask { + u32 red_mask; + u32 green_mask; + u32 blue_mask; + u32 reserved_mask; +}; + +struct efi_graphics_output_mode_info { + u32 version; + u32 horizontal_resolution; + u32 vertical_resolution; + int pixel_format; + struct efi_pixel_bitmask pixel_information; + u32 pixels_per_scan_line; +} __packed; + +struct efi_graphics_output_protocol_mode_32 { + u32 max_mode; + u32 mode; + u32 info; + u32 size_of_info; + u64 frame_buffer_base; + u32 frame_buffer_size; +} __packed; + +struct efi_graphics_output_protocol_mode_64 { + u32 max_mode; + u32 mode; + u64 info; + u64 size_of_info; + u64 frame_buffer_base; + u64 frame_buffer_size; +} __packed; + +struct efi_graphics_output_protocol_mode { + u32 max_mode; + u32 mode; + unsigned long info; + unsigned long size_of_info; + u64 frame_buffer_base; + unsigned long frame_buffer_size; +} __packed; + +struct efi_graphics_output_protocol_32 { + u32 query_mode; + u32 set_mode; + u32 blt; + u32 mode; +}; + +struct efi_graphics_output_protocol_64 { + u64 query_mode; + u64 set_mode; + u64 blt; + u64 mode; +}; + +struct efi_graphics_output_protocol { + void *query_mode; + unsigned long set_mode; + unsigned long blt; + struct efi_graphics_output_protocol_mode *mode; +}; + +struct efi_uga_draw_protocol_32 { + u32 get_mode; + u32 set_mode; + u32 blt; +}; + +struct efi_uga_draw_protocol_64 { + u64 get_mode; + u64 set_mode; + u64 blt; +}; + +struct efi_uga_draw_protocol { + void *get_mode; + void *set_mode; + void *blt; +}; + +#endif /* BOOT_COMPRESSED_EBOOT_H */ diff --git a/kernel/arch/x86/boot/compressed/efi_stub_32.S b/kernel/arch/x86/boot/compressed/efi_stub_32.S new file mode 100644 index 000000000..a53440e81 --- /dev/null +++ b/kernel/arch/x86/boot/compressed/efi_stub_32.S @@ -0,0 +1,86 @@ +/* + * EFI call stub for IA32. + * + * This stub allows us to make EFI calls in physical mode with interrupts + * turned off. Note that this implementation is different from the one in + * arch/x86/platform/efi/efi_stub_32.S because we're _already_ in physical + * mode at this point. + */ + +#include <linux/linkage.h> +#include <asm/page_types.h> + +/* + * efi_call_phys(void *, ...) is a function with variable parameters. + * All the callers of this function assure that all the parameters are 4-bytes. + */ + +/* + * In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save. + * So we'd better save all of them at the beginning of this function and restore + * at the end no matter how many we use, because we can not assure EFI runtime + * service functions will comply with gcc calling convention, too. + */ + +.text +ENTRY(efi_call_phys) + /* + * 0. The function can only be called in Linux kernel. So CS has been + * set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found + * the values of these registers are the same. And, the corresponding + * GDT entries are identical. So I will do nothing about segment reg + * and GDT, but change GDT base register in prelog and epilog. + */ + + /* + * 1. Because we haven't been relocated by this point we need to + * use relative addressing. + */ + call 1f +1: popl %edx + subl $1b, %edx + + /* + * 2. Now on the top of stack is the return + * address in the caller of efi_call_phys(), then parameter 1, + * parameter 2, ..., param n. To make things easy, we save the return + * address of efi_call_phys in a global variable. + */ + popl %ecx + movl %ecx, saved_return_addr(%edx) + /* get the function pointer into ECX*/ + popl %ecx + movl %ecx, efi_rt_function_ptr(%edx) + + /* + * 3. Call the physical function. + */ + call *%ecx + + /* + * 4. Balance the stack. And because EAX contain the return value, + * we'd better not clobber it. We need to calculate our address + * again because %ecx and %edx are not preserved across EFI function + * calls. + */ + call 1f +1: popl %edx + subl $1b, %edx + + movl efi_rt_function_ptr(%edx), %ecx + pushl %ecx + + /* + * 10. Push the saved return address onto the stack and return. + */ + movl saved_return_addr(%edx), %ecx + pushl %ecx + ret +ENDPROC(efi_call_phys) +.previous + +.data +saved_return_addr: + .long 0 +efi_rt_function_ptr: + .long 0 diff --git a/kernel/arch/x86/boot/compressed/efi_stub_64.S b/kernel/arch/x86/boot/compressed/efi_stub_64.S new file mode 100644 index 000000000..99494dff2 --- /dev/null +++ b/kernel/arch/x86/boot/compressed/efi_stub_64.S @@ -0,0 +1,5 @@ +#include <asm/segment.h> +#include <asm/msr.h> +#include <asm/processor-flags.h> + +#include "../../platform/efi/efi_stub_64.S" diff --git a/kernel/arch/x86/boot/compressed/efi_thunk_64.S b/kernel/arch/x86/boot/compressed/efi_thunk_64.S new file mode 100644 index 000000000..630384a4c --- /dev/null +++ b/kernel/arch/x86/boot/compressed/efi_thunk_64.S @@ -0,0 +1,196 @@ +/* + * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming + * + * Early support for invoking 32-bit EFI services from a 64-bit kernel. + * + * Because this thunking occurs before ExitBootServices() we have to + * restore the firmware's 32-bit GDT before we make EFI serivce calls, + * since the firmware's 32-bit IDT is still currently installed and it + * needs to be able to service interrupts. + * + * On the plus side, we don't have to worry about mangling 64-bit + * addresses into 32-bits because we're executing with an identify + * mapped pagetable and haven't transitioned to 64-bit virtual addresses + * yet. + */ + +#include <linux/linkage.h> +#include <asm/msr.h> +#include <asm/page_types.h> +#include <asm/processor-flags.h> +#include <asm/segment.h> + + .code64 + .text +ENTRY(efi64_thunk) + push %rbp + push %rbx + + subq $8, %rsp + leaq efi_exit32(%rip), %rax + movl %eax, 4(%rsp) + leaq efi_gdt64(%rip), %rax + movl %eax, (%rsp) + movl %eax, 2(%rax) /* Fixup the gdt base address */ + + movl %ds, %eax + push %rax + movl %es, %eax + push %rax + movl %ss, %eax + push %rax + + /* + * Convert x86-64 ABI params to i386 ABI + */ + subq $32, %rsp + movl %esi, 0x0(%rsp) + movl %edx, 0x4(%rsp) + movl %ecx, 0x8(%rsp) + movq %r8, %rsi + movl %esi, 0xc(%rsp) + movq %r9, %rsi + movl %esi, 0x10(%rsp) + + sgdt save_gdt(%rip) + + leaq 1f(%rip), %rbx + movq %rbx, func_rt_ptr(%rip) + + /* + * Switch to gdt with 32-bit segments. This is the firmware GDT + * that was installed when the kernel started executing. This + * pointer was saved at the EFI stub entry point in head_64.S. + */ + leaq efi32_boot_gdt(%rip), %rax + lgdt (%rax) + + pushq $__KERNEL_CS + leaq efi_enter32(%rip), %rax + pushq %rax + lretq + +1: addq $32, %rsp + + lgdt save_gdt(%rip) + + pop %rbx + movl %ebx, %ss + pop %rbx + movl %ebx, %es + pop %rbx + movl %ebx, %ds + + /* + * Convert 32-bit status code into 64-bit. + */ + test %rax, %rax + jz 1f + movl %eax, %ecx + andl $0x0fffffff, %ecx + andl $0xf0000000, %eax + shl $32, %rax + or %rcx, %rax +1: + addq $8, %rsp + pop %rbx + pop %rbp + ret +ENDPROC(efi64_thunk) + +ENTRY(efi_exit32) + movq func_rt_ptr(%rip), %rax + push %rax + mov %rdi, %rax + ret +ENDPROC(efi_exit32) + + .code32 +/* + * EFI service pointer must be in %edi. + * + * The stack should represent the 32-bit calling convention. + */ +ENTRY(efi_enter32) + movl $__KERNEL_DS, %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %ss + + /* Reload pgtables */ + movl %cr3, %eax + movl %eax, %cr3 + + /* Disable paging */ + movl %cr0, %eax + btrl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + + /* Disable long mode via EFER */ + movl $MSR_EFER, %ecx + rdmsr + btrl $_EFER_LME, %eax + wrmsr + + call *%edi + + /* We must preserve return value */ + movl %eax, %edi + + /* + * Some firmware will return with interrupts enabled. Be sure to + * disable them before we switch GDTs. + */ + cli + + movl 56(%esp), %eax + movl %eax, 2(%eax) + lgdtl (%eax) + + movl %cr4, %eax + btsl $(X86_CR4_PAE_BIT), %eax + movl %eax, %cr4 + + movl %cr3, %eax + movl %eax, %cr3 + + movl $MSR_EFER, %ecx + rdmsr + btsl $_EFER_LME, %eax + wrmsr + + xorl %eax, %eax + lldt %ax + + movl 60(%esp), %eax + pushl $__KERNEL_CS + pushl %eax + + /* Enable paging */ + movl %cr0, %eax + btsl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + lret +ENDPROC(efi_enter32) + + .data + .balign 8 + .global efi32_boot_gdt +efi32_boot_gdt: .word 0 + .quad 0 + +save_gdt: .word 0 + .quad 0 +func_rt_ptr: .quad 0 + + .global efi_gdt64 +efi_gdt64: + .word efi_gdt64_end - efi_gdt64 + .long 0 /* Filled out by user */ + .word 0 + .quad 0x0000000000000000 /* NULL descriptor */ + .quad 0x00af9a000000ffff /* __KERNEL_CS */ + .quad 0x00cf92000000ffff /* __KERNEL_DS */ + .quad 0x0080890000000000 /* TS descriptor */ + .quad 0x0000000000000000 /* TS continued */ +efi_gdt64_end: diff --git a/kernel/arch/x86/boot/compressed/head_32.S b/kernel/arch/x86/boot/compressed/head_32.S new file mode 100644 index 000000000..8ef964ddc --- /dev/null +++ b/kernel/arch/x86/boot/compressed/head_32.S @@ -0,0 +1,248 @@ +/* + * linux/boot/head.S + * + * Copyright (C) 1991, 1992, 1993 Linus Torvalds + */ + +/* + * head.S contains the 32-bit startup code. + * + * NOTE!!! Startup happens at absolute address 0x00001000, which is also where + * the page directory will exist. The startup code will be overwritten by + * the page directory. [According to comments etc elsewhere on a compressed + * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC] + * + * Page 0 is deliberately kept safe, since System Management Mode code in + * laptops may need to access the BIOS data stored there. This is also + * useful for future device drivers that either access the BIOS via VM86 + * mode. + */ + +/* + * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 + */ + .text + +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/segment.h> +#include <asm/page_types.h> +#include <asm/boot.h> +#include <asm/asm-offsets.h> +#include <asm/bootparam.h> + + __HEAD +ENTRY(startup_32) +#ifdef CONFIG_EFI_STUB + jmp preferred_addr + + /* + * We don't need the return address, so set up the stack so + * efi_main() can find its arguments. + */ +ENTRY(efi_pe_entry) + add $0x4, %esp + + call 1f +1: popl %esi + subl $1b, %esi + + popl %ecx + movl %ecx, efi32_config(%esi) /* Handle */ + popl %ecx + movl %ecx, efi32_config+8(%esi) /* EFI System table pointer */ + + /* Relocate efi_config->call() */ + leal efi32_config(%esi), %eax + add %esi, 88(%eax) + pushl %eax + + call make_boot_params + cmpl $0, %eax + je fail + movl %esi, BP_code32_start(%eax) + popl %ecx + pushl %eax + pushl %ecx + jmp 2f /* Skip efi_config initialization */ + +ENTRY(efi32_stub_entry) + add $0x4, %esp + popl %ecx + popl %edx + + call 1f +1: popl %esi + subl $1b, %esi + + movl %ecx, efi32_config(%esi) /* Handle */ + movl %edx, efi32_config+8(%esi) /* EFI System table pointer */ + + /* Relocate efi_config->call() */ + leal efi32_config(%esi), %eax + add %esi, 88(%eax) + pushl %eax +2: + call efi_main + cmpl $0, %eax + movl %eax, %esi + jne 2f +fail: + /* EFI init failed, so hang. */ + hlt + jmp fail +2: + movl BP_code32_start(%esi), %eax + leal preferred_addr(%eax), %eax + jmp *%eax + +preferred_addr: +#endif + cld + /* + * Test KEEP_SEGMENTS flag to see if the bootloader is asking + * us to not reload segments + */ + testb $KEEP_SEGMENTS, BP_loadflags(%esi) + jnz 1f + + cli + movl $__BOOT_DS, %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %fs + movl %eax, %gs + movl %eax, %ss +1: + +/* + * Calculate the delta between where we were compiled to run + * at and where we were actually loaded at. This can only be done + * with a short local call on x86. Nothing else will tell us what + * address we are running at. The reserved chunk of the real-mode + * data at 0x1e4 (defined as a scratch field) are used as the stack + * for this calculation. Only 4 bytes are needed. + */ + leal (BP_scratch+4)(%esi), %esp + call 1f +1: popl %ebp + subl $1b, %ebp + +/* + * %ebp contains the address we are loaded at by the boot loader and %ebx + * contains the address where we should move the kernel image temporarily + * for safe in-place decompression. + */ + +#ifdef CONFIG_RELOCATABLE + movl %ebp, %ebx + movl BP_kernel_alignment(%esi), %eax + decl %eax + addl %eax, %ebx + notl %eax + andl %eax, %ebx + cmpl $LOAD_PHYSICAL_ADDR, %ebx + jge 1f +#endif + movl $LOAD_PHYSICAL_ADDR, %ebx +1: + + /* Target address to relocate to for decompression */ + addl $z_extract_offset, %ebx + + /* Set up the stack */ + leal boot_stack_end(%ebx), %esp + + /* Zero EFLAGS */ + pushl $0 + popfl + +/* + * Copy the compressed kernel to the end of our buffer + * where decompression in place becomes safe. + */ + pushl %esi + leal (_bss-4)(%ebp), %esi + leal (_bss-4)(%ebx), %edi + movl $(_bss - startup_32), %ecx + shrl $2, %ecx + std + rep movsl + cld + popl %esi + +/* + * Jump to the relocated address. + */ + leal relocated(%ebx), %eax + jmp *%eax +ENDPROC(startup_32) + + .text +relocated: + +/* + * Clear BSS (stack is currently empty) + */ + xorl %eax, %eax + leal _bss(%ebx), %edi + leal _ebss(%ebx), %ecx + subl %edi, %ecx + shrl $2, %ecx + rep stosl + +/* + * Adjust our own GOT + */ + leal _got(%ebx), %edx + leal _egot(%ebx), %ecx +1: + cmpl %ecx, %edx + jae 2f + addl %ebx, (%edx) + addl $4, %edx + jmp 1b +2: + +/* + * Do the decompression, and jump to the new kernel.. + */ + /* push arguments for decompress_kernel: */ + pushl $z_run_size /* size of kernel with .bss and .brk */ + pushl $z_output_len /* decompressed length, end of relocs */ + leal z_extract_offset_negative(%ebx), %ebp + pushl %ebp /* output address */ + pushl $z_input_len /* input_len */ + leal input_data(%ebx), %eax + pushl %eax /* input_data */ + leal boot_heap(%ebx), %eax + pushl %eax /* heap area */ + pushl %esi /* real mode pointer */ + call decompress_kernel /* returns kernel location in %eax */ + addl $28, %esp + +/* + * Jump to the decompressed kernel. + */ + xorl %ebx, %ebx + jmp *%eax + +#ifdef CONFIG_EFI_STUB + .data +efi32_config: + .fill 11,8,0 + .long efi_call_phys + .long 0 + .byte 0 +#endif + +/* + * Stack and heap for uncompression + */ + .bss + .balign 4 +boot_heap: + .fill BOOT_HEAP_SIZE, 1, 0 +boot_stack: + .fill BOOT_STACK_SIZE, 1, 0 +boot_stack_end: diff --git a/kernel/arch/x86/boot/compressed/head_64.S b/kernel/arch/x86/boot/compressed/head_64.S new file mode 100644 index 000000000..b0c0d16ef --- /dev/null +++ b/kernel/arch/x86/boot/compressed/head_64.S @@ -0,0 +1,480 @@ +/* + * linux/boot/head.S + * + * Copyright (C) 1991, 1992, 1993 Linus Torvalds + */ + +/* + * head.S contains the 32-bit startup code. + * + * NOTE!!! Startup happens at absolute address 0x00001000, which is also where + * the page directory will exist. The startup code will be overwritten by + * the page directory. [According to comments etc elsewhere on a compressed + * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC] + * + * Page 0 is deliberately kept safe, since System Management Mode code in + * laptops may need to access the BIOS data stored there. This is also + * useful for future device drivers that either access the BIOS via VM86 + * mode. + */ + +/* + * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 + */ + .code32 + .text + +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/segment.h> +#include <asm/boot.h> +#include <asm/msr.h> +#include <asm/processor-flags.h> +#include <asm/asm-offsets.h> +#include <asm/bootparam.h> + + __HEAD + .code32 +ENTRY(startup_32) + /* + * 32bit entry is 0 and it is ABI so immutable! + * If we come here directly from a bootloader, + * kernel(text+data+bss+brk) ramdisk, zero_page, command line + * all need to be under the 4G limit. + */ + cld + /* + * Test KEEP_SEGMENTS flag to see if the bootloader is asking + * us to not reload segments + */ + testb $KEEP_SEGMENTS, BP_loadflags(%esi) + jnz 1f + + cli + movl $(__BOOT_DS), %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %ss +1: + +/* + * Calculate the delta between where we were compiled to run + * at and where we were actually loaded at. This can only be done + * with a short local call on x86. Nothing else will tell us what + * address we are running at. The reserved chunk of the real-mode + * data at 0x1e4 (defined as a scratch field) are used as the stack + * for this calculation. Only 4 bytes are needed. + */ + leal (BP_scratch+4)(%esi), %esp + call 1f +1: popl %ebp + subl $1b, %ebp + +/* setup a stack and make sure cpu supports long mode. */ + movl $boot_stack_end, %eax + addl %ebp, %eax + movl %eax, %esp + + call verify_cpu + testl %eax, %eax + jnz no_longmode + +/* + * Compute the delta between where we were compiled to run at + * and where the code will actually run at. + * + * %ebp contains the address we are loaded at by the boot loader and %ebx + * contains the address where we should move the kernel image temporarily + * for safe in-place decompression. + */ + +#ifdef CONFIG_RELOCATABLE + movl %ebp, %ebx + movl BP_kernel_alignment(%esi), %eax + decl %eax + addl %eax, %ebx + notl %eax + andl %eax, %ebx + cmpl $LOAD_PHYSICAL_ADDR, %ebx + jge 1f +#endif + movl $LOAD_PHYSICAL_ADDR, %ebx +1: + + /* Target address to relocate to for decompression */ + addl $z_extract_offset, %ebx + +/* + * Prepare for entering 64 bit mode + */ + + /* Load new GDT with the 64bit segments using 32bit descriptor */ + leal gdt(%ebp), %eax + movl %eax, gdt+2(%ebp) + lgdt gdt(%ebp) + + /* Enable PAE mode */ + movl %cr4, %eax + orl $X86_CR4_PAE, %eax + movl %eax, %cr4 + + /* + * Build early 4G boot pagetable + */ + /* Initialize Page tables to 0 */ + leal pgtable(%ebx), %edi + xorl %eax, %eax + movl $((4096*6)/4), %ecx + rep stosl + + /* Build Level 4 */ + leal pgtable + 0(%ebx), %edi + leal 0x1007 (%edi), %eax + movl %eax, 0(%edi) + + /* Build Level 3 */ + leal pgtable + 0x1000(%ebx), %edi + leal 0x1007(%edi), %eax + movl $4, %ecx +1: movl %eax, 0x00(%edi) + addl $0x00001000, %eax + addl $8, %edi + decl %ecx + jnz 1b + + /* Build Level 2 */ + leal pgtable + 0x2000(%ebx), %edi + movl $0x00000183, %eax + movl $2048, %ecx +1: movl %eax, 0(%edi) + addl $0x00200000, %eax + addl $8, %edi + decl %ecx + jnz 1b + + /* Enable the boot page tables */ + leal pgtable(%ebx), %eax + movl %eax, %cr3 + + /* Enable Long mode in EFER (Extended Feature Enable Register) */ + movl $MSR_EFER, %ecx + rdmsr + btsl $_EFER_LME, %eax + wrmsr + + /* After gdt is loaded */ + xorl %eax, %eax + lldt %ax + movl $__BOOT_TSS, %eax + ltr %ax + + /* + * Setup for the jump to 64bit mode + * + * When the jump is performend we will be in long mode but + * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1 + * (and in turn EFER.LMA = 1). To jump into 64bit mode we use + * the new gdt/idt that has __KERNEL_CS with CS.L = 1. + * We place all of the values on our mini stack so lret can + * used to perform that far jump. + */ + pushl $__KERNEL_CS + leal startup_64(%ebp), %eax +#ifdef CONFIG_EFI_MIXED + movl efi32_config(%ebp), %ebx + cmp $0, %ebx + jz 1f + leal handover_entry(%ebp), %eax +1: +#endif + pushl %eax + + /* Enter paged protected Mode, activating Long Mode */ + movl $(X86_CR0_PG | X86_CR0_PE), %eax /* Enable Paging and Protected mode */ + movl %eax, %cr0 + + /* Jump from 32bit compatibility mode into 64bit mode. */ + lret +ENDPROC(startup_32) + +#ifdef CONFIG_EFI_MIXED + .org 0x190 +ENTRY(efi32_stub_entry) + add $0x4, %esp /* Discard return address */ + popl %ecx + popl %edx + popl %esi + + leal (BP_scratch+4)(%esi), %esp + call 1f +1: pop %ebp + subl $1b, %ebp + + movl %ecx, efi32_config(%ebp) + movl %edx, efi32_config+8(%ebp) + sgdtl efi32_boot_gdt(%ebp) + + leal efi32_config(%ebp), %eax + movl %eax, efi_config(%ebp) + + jmp startup_32 +ENDPROC(efi32_stub_entry) +#endif + + .code64 + .org 0x200 +ENTRY(startup_64) + /* + * 64bit entry is 0x200 and it is ABI so immutable! + * We come here either from startup_32 or directly from a + * 64bit bootloader. + * If we come here from a bootloader, kernel(text+data+bss+brk), + * ramdisk, zero_page, command line could be above 4G. + * We depend on an identity mapped page table being provided + * that maps our entire kernel(text+data+bss+brk), zero page + * and command line. + */ +#ifdef CONFIG_EFI_STUB + /* + * The entry point for the PE/COFF executable is efi_pe_entry, so + * only legacy boot loaders will execute this jmp. + */ + jmp preferred_addr + +ENTRY(efi_pe_entry) + movq %rcx, efi64_config(%rip) /* Handle */ + movq %rdx, efi64_config+8(%rip) /* EFI System table pointer */ + + leaq efi64_config(%rip), %rax + movq %rax, efi_config(%rip) + + call 1f +1: popq %rbp + subq $1b, %rbp + + /* + * Relocate efi_config->call(). + */ + addq %rbp, efi64_config+88(%rip) + + movq %rax, %rdi + call make_boot_params + cmpq $0,%rax + je fail + mov %rax, %rsi + leaq startup_32(%rip), %rax + movl %eax, BP_code32_start(%rsi) + jmp 2f /* Skip the relocation */ + +handover_entry: + call 1f +1: popq %rbp + subq $1b, %rbp + + /* + * Relocate efi_config->call(). + */ + movq efi_config(%rip), %rax + addq %rbp, 88(%rax) +2: + movq efi_config(%rip), %rdi + call efi_main + movq %rax,%rsi + cmpq $0,%rax + jne 2f +fail: + /* EFI init failed, so hang. */ + hlt + jmp fail +2: + movl BP_code32_start(%esi), %eax + leaq preferred_addr(%rax), %rax + jmp *%rax + +preferred_addr: +#endif + + /* Setup data segments. */ + xorl %eax, %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %ss + movl %eax, %fs + movl %eax, %gs + + /* + * Compute the decompressed kernel start address. It is where + * we were loaded at aligned to a 2M boundary. %rbp contains the + * decompressed kernel start address. + * + * If it is a relocatable kernel then decompress and run the kernel + * from load address aligned to 2MB addr, otherwise decompress and + * run the kernel from LOAD_PHYSICAL_ADDR + * + * We cannot rely on the calculation done in 32-bit mode, since we + * may have been invoked via the 64-bit entry point. + */ + + /* Start with the delta to where the kernel will run at. */ +#ifdef CONFIG_RELOCATABLE + leaq startup_32(%rip) /* - $startup_32 */, %rbp + movl BP_kernel_alignment(%rsi), %eax + decl %eax + addq %rax, %rbp + notq %rax + andq %rax, %rbp + cmpq $LOAD_PHYSICAL_ADDR, %rbp + jge 1f +#endif + movq $LOAD_PHYSICAL_ADDR, %rbp +1: + + /* Target address to relocate to for decompression */ + leaq z_extract_offset(%rbp), %rbx + + /* Set up the stack */ + leaq boot_stack_end(%rbx), %rsp + + /* Zero EFLAGS */ + pushq $0 + popfq + +/* + * Copy the compressed kernel to the end of our buffer + * where decompression in place becomes safe. + */ + pushq %rsi + leaq (_bss-8)(%rip), %rsi + leaq (_bss-8)(%rbx), %rdi + movq $_bss /* - $startup_32 */, %rcx + shrq $3, %rcx + std + rep movsq + cld + popq %rsi + +/* + * Jump to the relocated address. + */ + leaq relocated(%rbx), %rax + jmp *%rax + +#ifdef CONFIG_EFI_STUB + .org 0x390 +ENTRY(efi64_stub_entry) + movq %rdi, efi64_config(%rip) /* Handle */ + movq %rsi, efi64_config+8(%rip) /* EFI System table pointer */ + + leaq efi64_config(%rip), %rax + movq %rax, efi_config(%rip) + + movq %rdx, %rsi + jmp handover_entry +ENDPROC(efi64_stub_entry) +#endif + + .text +relocated: + +/* + * Clear BSS (stack is currently empty) + */ + xorl %eax, %eax + leaq _bss(%rip), %rdi + leaq _ebss(%rip), %rcx + subq %rdi, %rcx + shrq $3, %rcx + rep stosq + +/* + * Adjust our own GOT + */ + leaq _got(%rip), %rdx + leaq _egot(%rip), %rcx +1: + cmpq %rcx, %rdx + jae 2f + addq %rbx, (%rdx) + addq $8, %rdx + jmp 1b +2: + +/* + * Do the decompression, and jump to the new kernel.. + */ + pushq %rsi /* Save the real mode argument */ + movq $z_run_size, %r9 /* size of kernel with .bss and .brk */ + pushq %r9 + movq %rsi, %rdi /* real mode address */ + leaq boot_heap(%rip), %rsi /* malloc area for uncompression */ + leaq input_data(%rip), %rdx /* input_data */ + movl $z_input_len, %ecx /* input_len */ + movq %rbp, %r8 /* output target address */ + movq $z_output_len, %r9 /* decompressed length, end of relocs */ + call decompress_kernel /* returns kernel location in %rax */ + popq %r9 + popq %rsi + +/* + * Jump to the decompressed kernel. + */ + jmp *%rax + + .code32 +no_longmode: + /* This isn't an x86-64 CPU so hang */ +1: + hlt + jmp 1b + +#include "../../kernel/verify_cpu.S" + + .data +gdt: + .word gdt_end - gdt + .long gdt + .word 0 + .quad 0x0000000000000000 /* NULL descriptor */ + .quad 0x00af9a000000ffff /* __KERNEL_CS */ + .quad 0x00cf92000000ffff /* __KERNEL_DS */ + .quad 0x0080890000000000 /* TS descriptor */ + .quad 0x0000000000000000 /* TS continued */ +gdt_end: + +#ifdef CONFIG_EFI_STUB +efi_config: + .quad 0 + +#ifdef CONFIG_EFI_MIXED + .global efi32_config +efi32_config: + .fill 11,8,0 + .quad efi64_thunk + .byte 0 +#endif + + .global efi64_config +efi64_config: + .fill 11,8,0 + .quad efi_call + .byte 1 +#endif /* CONFIG_EFI_STUB */ + +/* + * Stack and heap for uncompression + */ + .bss + .balign 4 +boot_heap: + .fill BOOT_HEAP_SIZE, 1, 0 +boot_stack: + .fill BOOT_STACK_SIZE, 1, 0 +boot_stack_end: + +/* + * Space for page tables (not in .bss so not zeroed) + */ + .section ".pgtable","a",@nobits + .balign 4096 +pgtable: + .fill 6*4096, 1, 0 diff --git a/kernel/arch/x86/boot/compressed/misc.c b/kernel/arch/x86/boot/compressed/misc.c new file mode 100644 index 000000000..a107b935e --- /dev/null +++ b/kernel/arch/x86/boot/compressed/misc.c @@ -0,0 +1,437 @@ +/* + * misc.c + * + * This is a collection of several routines from gzip-1.0.3 + * adapted for Linux. + * + * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994 + * puts by Nick Holloway 1993, better puts by Martin Mares 1995 + * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 + */ + +#include "misc.h" +#include "../string.h" + +/* WARNING!! + * This code is compiled with -fPIC and it is relocated dynamically + * at run time, but no relocation processing is performed. + * This means that it is not safe to place pointers in static structures. + */ + +/* + * Getting to provable safe in place decompression is hard. + * Worst case behaviours need to be analyzed. + * Background information: + * + * The file layout is: + * magic[2] + * method[1] + * flags[1] + * timestamp[4] + * extraflags[1] + * os[1] + * compressed data blocks[N] + * crc[4] orig_len[4] + * + * resulting in 18 bytes of non compressed data overhead. + * + * Files divided into blocks + * 1 bit (last block flag) + * 2 bits (block type) + * + * 1 block occurs every 32K -1 bytes or when there 50% compression + * has been achieved. The smallest block type encoding is always used. + * + * stored: + * 32 bits length in bytes. + * + * fixed: + * magic fixed tree. + * symbols. + * + * dynamic: + * dynamic tree encoding. + * symbols. + * + * + * The buffer for decompression in place is the length of the + * uncompressed data, plus a small amount extra to keep the algorithm safe. + * The compressed data is placed at the end of the buffer. The output + * pointer is placed at the start of the buffer and the input pointer + * is placed where the compressed data starts. Problems will occur + * when the output pointer overruns the input pointer. + * + * The output pointer can only overrun the input pointer if the input + * pointer is moving faster than the output pointer. A condition only + * triggered by data whose compressed form is larger than the uncompressed + * form. + * + * The worst case at the block level is a growth of the compressed data + * of 5 bytes per 32767 bytes. + * + * The worst case internal to a compressed block is very hard to figure. + * The worst case can at least be boundined by having one bit that represents + * 32764 bytes and then all of the rest of the bytes representing the very + * very last byte. + * + * All of which is enough to compute an amount of extra data that is required + * to be safe. To avoid problems at the block level allocating 5 extra bytes + * per 32767 bytes of data is sufficient. To avoind problems internal to a + * block adding an extra 32767 bytes (the worst case uncompressed block size) + * is sufficient, to ensure that in the worst case the decompressed data for + * block will stop the byte before the compressed data for a block begins. + * To avoid problems with the compressed data's meta information an extra 18 + * bytes are needed. Leading to the formula: + * + * extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size. + * + * Adding 8 bytes per 32K is a bit excessive but much easier to calculate. + * Adding 32768 instead of 32767 just makes for round numbers. + * Adding the decompressor_size is necessary as it musht live after all + * of the data as well. Last I measured the decompressor is about 14K. + * 10K of actual data and 4K of bss. + * + */ + +/* + * gzip declarations + */ +#define STATIC static + +#undef memcpy + +/* + * Use a normal definition of memset() from string.c. There are already + * included header files which expect a definition of memset() and by + * the time we define memset macro, it is too late. + */ +#undef memset +#define memzero(s, n) memset((s), 0, (n)) + + +static void error(char *m); + +/* + * This is set up by the setup-routine at boot-time + */ +struct boot_params *real_mode; /* Pointer to real-mode data */ + +memptr free_mem_ptr; +memptr free_mem_end_ptr; + +static char *vidmem; +static int vidport; +static int lines, cols; + +#ifdef CONFIG_KERNEL_GZIP +#include "../../../../lib/decompress_inflate.c" +#endif + +#ifdef CONFIG_KERNEL_BZIP2 +#include "../../../../lib/decompress_bunzip2.c" +#endif + +#ifdef CONFIG_KERNEL_LZMA +#include "../../../../lib/decompress_unlzma.c" +#endif + +#ifdef CONFIG_KERNEL_XZ +#include "../../../../lib/decompress_unxz.c" +#endif + +#ifdef CONFIG_KERNEL_LZO +#include "../../../../lib/decompress_unlzo.c" +#endif + +#ifdef CONFIG_KERNEL_LZ4 +#include "../../../../lib/decompress_unlz4.c" +#endif + +static void scroll(void) +{ + int i; + + memcpy(vidmem, vidmem + cols * 2, (lines - 1) * cols * 2); + for (i = (lines - 1) * cols * 2; i < lines * cols * 2; i += 2) + vidmem[i] = ' '; +} + +#define XMTRDY 0x20 + +#define TXR 0 /* Transmit register (WRITE) */ +#define LSR 5 /* Line Status */ +static void serial_putchar(int ch) +{ + unsigned timeout = 0xffff; + + while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) + cpu_relax(); + + outb(ch, early_serial_base + TXR); +} + +void __putstr(const char *s) +{ + int x, y, pos; + char c; + + if (early_serial_base) { + const char *str = s; + while (*str) { + if (*str == '\n') + serial_putchar('\r'); + serial_putchar(*str++); + } + } + + if (real_mode->screen_info.orig_video_mode == 0 && + lines == 0 && cols == 0) + return; + + x = real_mode->screen_info.orig_x; + y = real_mode->screen_info.orig_y; + + while ((c = *s++) != '\0') { + if (c == '\n') { + x = 0; + if (++y >= lines) { + scroll(); + y--; + } + } else { + vidmem[(x + cols * y) * 2] = c; + if (++x >= cols) { + x = 0; + if (++y >= lines) { + scroll(); + y--; + } + } + } + } + + real_mode->screen_info.orig_x = x; + real_mode->screen_info.orig_y = y; + + pos = (x + cols * y) * 2; /* Update cursor position */ + outb(14, vidport); + outb(0xff & (pos >> 9), vidport+1); + outb(15, vidport); + outb(0xff & (pos >> 1), vidport+1); +} + +static void error(char *x) +{ + error_putstr("\n\n"); + error_putstr(x); + error_putstr("\n\n -- System halted"); + + while (1) + asm("hlt"); +} + +#if CONFIG_X86_NEED_RELOCS +static void handle_relocations(void *output, unsigned long output_len) +{ + int *reloc; + unsigned long delta, map, ptr; + unsigned long min_addr = (unsigned long)output; + unsigned long max_addr = min_addr + output_len; + + /* + * Calculate the delta between where vmlinux was linked to load + * and where it was actually loaded. + */ + delta = min_addr - LOAD_PHYSICAL_ADDR; + if (!delta) { + debug_putstr("No relocation needed... "); + return; + } + debug_putstr("Performing relocations... "); + + /* + * The kernel contains a table of relocation addresses. Those + * addresses have the final load address of the kernel in virtual + * memory. We are currently working in the self map. So we need to + * create an adjustment for kernel memory addresses to the self map. + * This will involve subtracting out the base address of the kernel. + */ + map = delta - __START_KERNEL_map; + + /* + * Process relocations: 32 bit relocations first then 64 bit after. + * Three sets of binary relocations are added to the end of the kernel + * before compression. Each relocation table entry is the kernel + * address of the location which needs to be updated stored as a + * 32-bit value which is sign extended to 64 bits. + * + * Format is: + * + * kernel bits... + * 0 - zero terminator for 64 bit relocations + * 64 bit relocation repeated + * 0 - zero terminator for inverse 32 bit relocations + * 32 bit inverse relocation repeated + * 0 - zero terminator for 32 bit relocations + * 32 bit relocation repeated + * + * So we work backwards from the end of the decompressed image. + */ + for (reloc = output + output_len - sizeof(*reloc); *reloc; reloc--) { + int extended = *reloc; + extended += map; + + ptr = (unsigned long)extended; + if (ptr < min_addr || ptr > max_addr) + error("32-bit relocation outside of kernel!\n"); + + *(uint32_t *)ptr += delta; + } +#ifdef CONFIG_X86_64 + while (*--reloc) { + long extended = *reloc; + extended += map; + + ptr = (unsigned long)extended; + if (ptr < min_addr || ptr > max_addr) + error("inverse 32-bit relocation outside of kernel!\n"); + + *(int32_t *)ptr -= delta; + } + for (reloc--; *reloc; reloc--) { + long extended = *reloc; + extended += map; + + ptr = (unsigned long)extended; + if (ptr < min_addr || ptr > max_addr) + error("64-bit relocation outside of kernel!\n"); + + *(uint64_t *)ptr += delta; + } +#endif +} +#else +static inline void handle_relocations(void *output, unsigned long output_len) +{ } +#endif + +static void parse_elf(void *output) +{ +#ifdef CONFIG_X86_64 + Elf64_Ehdr ehdr; + Elf64_Phdr *phdrs, *phdr; +#else + Elf32_Ehdr ehdr; + Elf32_Phdr *phdrs, *phdr; +#endif + void *dest; + int i; + + memcpy(&ehdr, output, sizeof(ehdr)); + if (ehdr.e_ident[EI_MAG0] != ELFMAG0 || + ehdr.e_ident[EI_MAG1] != ELFMAG1 || + ehdr.e_ident[EI_MAG2] != ELFMAG2 || + ehdr.e_ident[EI_MAG3] != ELFMAG3) { + error("Kernel is not a valid ELF file"); + return; + } + + debug_putstr("Parsing ELF... "); + + phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum); + if (!phdrs) + error("Failed to allocate space for phdrs"); + + memcpy(phdrs, output + ehdr.e_phoff, sizeof(*phdrs) * ehdr.e_phnum); + + for (i = 0; i < ehdr.e_phnum; i++) { + phdr = &phdrs[i]; + + switch (phdr->p_type) { + case PT_LOAD: +#ifdef CONFIG_RELOCATABLE + dest = output; + dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR); +#else + dest = (void *)(phdr->p_paddr); +#endif + memcpy(dest, + output + phdr->p_offset, + phdr->p_filesz); + break; + default: /* Ignore other PT_* */ break; + } + } + + free(phdrs); +} + +asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, + unsigned char *input_data, + unsigned long input_len, + unsigned char *output, + unsigned long output_len, + unsigned long run_size) +{ + unsigned char *output_orig = output; + + real_mode = rmode; + + /* Clear it for solely in-kernel use */ + real_mode->hdr.loadflags &= ~KASLR_FLAG; + + sanitize_boot_params(real_mode); + + if (real_mode->screen_info.orig_video_mode == 7) { + vidmem = (char *) 0xb0000; + vidport = 0x3b4; + } else { + vidmem = (char *) 0xb8000; + vidport = 0x3d4; + } + + lines = real_mode->screen_info.orig_video_lines; + cols = real_mode->screen_info.orig_video_cols; + + console_init(); + debug_putstr("early console in decompress_kernel\n"); + + free_mem_ptr = heap; /* Heap */ + free_mem_end_ptr = heap + BOOT_HEAP_SIZE; + + /* + * The memory hole needed for the kernel is the larger of either + * the entire decompressed kernel plus relocation table, or the + * entire decompressed kernel plus .bss and .brk sections. + */ + output = choose_kernel_location(real_mode, input_data, input_len, output, + output_len > run_size ? output_len + : run_size); + + /* Validate memory location choices. */ + if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) + error("Destination address inappropriately aligned"); +#ifdef CONFIG_X86_64 + if (heap > 0x3fffffffffffUL) + error("Destination address too large"); +#else + if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff)) + error("Destination address too large"); +#endif +#ifndef CONFIG_RELOCATABLE + if ((unsigned long)output != LOAD_PHYSICAL_ADDR) + error("Wrong destination address"); +#endif + + debug_putstr("\nDecompressing Linux... "); + decompress(input_data, input_len, NULL, NULL, output, NULL, error); + parse_elf(output); + /* + * 32-bit always performs relocations. 64-bit relocations are only + * needed if kASLR has chosen a different load address. + */ + if (!IS_ENABLED(CONFIG_X86_64) || output != output_orig) + handle_relocations(output, output_len); + debug_putstr("done.\nBooting the kernel.\n"); + return output; +} diff --git a/kernel/arch/x86/boot/compressed/misc.h b/kernel/arch/x86/boot/compressed/misc.h new file mode 100644 index 000000000..805d25ca5 --- /dev/null +++ b/kernel/arch/x86/boot/compressed/misc.h @@ -0,0 +1,88 @@ +#ifndef BOOT_COMPRESSED_MISC_H +#define BOOT_COMPRESSED_MISC_H + +/* + * Special hack: we have to be careful, because no indirections are allowed here, + * and paravirt_ops is a kind of one. As it will only run in baremetal anyway, + * we just keep it from happening. (This list needs to be extended when new + * paravirt and debugging variants are added.) + */ +#undef CONFIG_PARAVIRT +#undef CONFIG_PARAVIRT_SPINLOCKS +#undef CONFIG_KASAN + +#include <linux/linkage.h> +#include <linux/screen_info.h> +#include <linux/elf.h> +#include <linux/io.h> +#include <asm/page.h> +#include <asm/boot.h> +#include <asm/bootparam.h> +#include <asm/bootparam_utils.h> + +#define BOOT_BOOT_H +#include "../ctype.h" + +#ifdef CONFIG_X86_64 +#define memptr long +#else +#define memptr unsigned +#endif + +/* misc.c */ +extern memptr free_mem_ptr; +extern memptr free_mem_end_ptr; +extern struct boot_params *real_mode; /* Pointer to real-mode data */ +void __putstr(const char *s); +#define error_putstr(__x) __putstr(__x) + +#ifdef CONFIG_X86_VERBOSE_BOOTUP + +#define debug_putstr(__x) __putstr(__x) + +#else + +static inline void debug_putstr(const char *s) +{ } + +#endif + +#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE +/* cmdline.c */ +int cmdline_find_option(const char *option, char *buffer, int bufsize); +int cmdline_find_option_bool(const char *option); +#endif + + +#if CONFIG_RANDOMIZE_BASE +/* aslr.c */ +unsigned char *choose_kernel_location(struct boot_params *boot_params, + unsigned char *input, + unsigned long input_size, + unsigned char *output, + unsigned long output_size); +/* cpuflags.c */ +bool has_cpuflag(int flag); +#else +static inline +unsigned char *choose_kernel_location(struct boot_params *boot_params, + unsigned char *input, + unsigned long input_size, + unsigned char *output, + unsigned long output_size) +{ + return output; +} +#endif + +#ifdef CONFIG_EARLY_PRINTK +/* early_serial_console.c */ +extern int early_serial_base; +void console_init(void); +#else +static const int early_serial_base; +static inline void console_init(void) +{ } +#endif + +#endif diff --git a/kernel/arch/x86/boot/compressed/mkpiggy.c b/kernel/arch/x86/boot/compressed/mkpiggy.c new file mode 100644 index 000000000..d8222f213 --- /dev/null +++ b/kernel/arch/x86/boot/compressed/mkpiggy.c @@ -0,0 +1,104 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright (C) 2009 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * H. Peter Anvin <hpa@linux.intel.com> + * + * ----------------------------------------------------------------------- */ + +/* + * Compute the desired load offset from a compressed program; outputs + * a small assembly wrapper with the appropriate symbols defined. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#include <tools/le_byteshift.h> + +int main(int argc, char *argv[]) +{ + uint32_t olen; + long ilen; + unsigned long offs; + unsigned long run_size; + FILE *f = NULL; + int retval = 1; + + if (argc < 3) { + fprintf(stderr, "Usage: %s compressed_file run_size\n", + argv[0]); + goto bail; + } + + /* Get the information for the compressed kernel image first */ + + f = fopen(argv[1], "r"); + if (!f) { + perror(argv[1]); + goto bail; + } + + + if (fseek(f, -4L, SEEK_END)) { + perror(argv[1]); + } + + if (fread(&olen, sizeof(olen), 1, f) != 1) { + perror(argv[1]); + goto bail; + } + + ilen = ftell(f); + olen = get_unaligned_le32(&olen); + + /* + * Now we have the input (compressed) and output (uncompressed) + * sizes, compute the necessary decompression offset... + */ + + offs = (olen > ilen) ? olen - ilen : 0; + offs += olen >> 12; /* Add 8 bytes for each 32K block */ + offs += 64*1024 + 128; /* Add 64K + 128 bytes slack */ + offs = (offs+4095) & ~4095; /* Round to a 4K boundary */ + run_size = atoi(argv[2]); + + printf(".section \".rodata..compressed\",\"a\",@progbits\n"); + printf(".globl z_input_len\n"); + printf("z_input_len = %lu\n", ilen); + printf(".globl z_output_len\n"); + printf("z_output_len = %lu\n", (unsigned long)olen); + printf(".globl z_extract_offset\n"); + printf("z_extract_offset = 0x%lx\n", offs); + /* z_extract_offset_negative allows simplification of head_32.S */ + printf(".globl z_extract_offset_negative\n"); + printf("z_extract_offset_negative = -0x%lx\n", offs); + printf(".globl z_run_size\n"); + printf("z_run_size = %lu\n", run_size); + + printf(".globl input_data, input_data_end\n"); + printf("input_data:\n"); + printf(".incbin \"%s\"\n", argv[1]); + printf("input_data_end:\n"); + + retval = 0; +bail: + if (f) + fclose(f); + return retval; +} diff --git a/kernel/arch/x86/boot/compressed/string.c b/kernel/arch/x86/boot/compressed/string.c new file mode 100644 index 000000000..00e788be1 --- /dev/null +++ b/kernel/arch/x86/boot/compressed/string.c @@ -0,0 +1,41 @@ +#include "../string.c" + +#ifdef CONFIG_X86_32 +void *memcpy(void *dest, const void *src, size_t n) +{ + int d0, d1, d2; + asm volatile( + "rep ; movsl\n\t" + "movl %4,%%ecx\n\t" + "rep ; movsb\n\t" + : "=&c" (d0), "=&D" (d1), "=&S" (d2) + : "0" (n >> 2), "g" (n & 3), "1" (dest), "2" (src) + : "memory"); + + return dest; +} +#else +void *memcpy(void *dest, const void *src, size_t n) +{ + long d0, d1, d2; + asm volatile( + "rep ; movsq\n\t" + "movq %4,%%rcx\n\t" + "rep ; movsb\n\t" + : "=&c" (d0), "=&D" (d1), "=&S" (d2) + : "0" (n >> 3), "g" (n & 7), "1" (dest), "2" (src) + : "memory"); + + return dest; +} +#endif + +void *memset(void *s, int c, size_t n) +{ + int i; + char *ss = s; + + for (i = 0; i < n; i++) + ss[i] = c; + return s; +} diff --git a/kernel/arch/x86/boot/compressed/vmlinux.lds.S b/kernel/arch/x86/boot/compressed/vmlinux.lds.S new file mode 100644 index 000000000..34d047c98 --- /dev/null +++ b/kernel/arch/x86/boot/compressed/vmlinux.lds.S @@ -0,0 +1,74 @@ +#include <asm-generic/vmlinux.lds.h> + +OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT) + +#undef i386 + +#include <asm/cache.h> +#include <asm/page_types.h> + +#ifdef CONFIG_X86_64 +OUTPUT_ARCH(i386:x86-64) +ENTRY(startup_64) +#else +OUTPUT_ARCH(i386) +ENTRY(startup_32) +#endif + +SECTIONS +{ + /* Be careful parts of head_64.S assume startup_32 is at + * address 0. + */ + . = 0; + .head.text : { + _head = . ; + HEAD_TEXT + _ehead = . ; + } + .rodata..compressed : { + *(.rodata..compressed) + } + .text : { + _text = .; /* Text */ + *(.text) + *(.text.*) + _etext = . ; + } + .rodata : { + _rodata = . ; + *(.rodata) /* read-only data */ + *(.rodata.*) + _erodata = . ; + } + .got : { + _got = .; + KEEP(*(.got.plt)) + KEEP(*(.got)) + _egot = .; + } + .data : { + _data = . ; + *(.data) + *(.data.*) + _edata = . ; + } + . = ALIGN(L1_CACHE_BYTES); + .bss : { + _bss = . ; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(8); /* For convenience during zeroing */ + _ebss = .; + } +#ifdef CONFIG_X86_64 + . = ALIGN(PAGE_SIZE); + .pgtable : { + _pgtable = . ; + *(.pgtable) + _epgtable = . ; + } +#endif + _end = .; +} |