diff options
Diffstat (limited to 'kernel/tools/testing')
185 files changed, 13921 insertions, 278 deletions
diff --git a/kernel/tools/testing/nvdimm/Kbuild b/kernel/tools/testing/nvdimm/Kbuild new file mode 100644 index 000000000..38b00ecb2 --- /dev/null +++ b/kernel/tools/testing/nvdimm/Kbuild @@ -0,0 +1,52 @@ +ldflags-y += --wrap=ioremap_wc +ldflags-y += --wrap=memremap +ldflags-y += --wrap=devm_ioremap_nocache +ldflags-y += --wrap=devm_memremap +ldflags-y += --wrap=devm_memunmap +ldflags-y += --wrap=ioremap_nocache +ldflags-y += --wrap=iounmap +ldflags-y += --wrap=memunmap +ldflags-y += --wrap=__devm_request_region +ldflags-y += --wrap=__request_region +ldflags-y += --wrap=__release_region + +DRIVERS := ../../../drivers +NVDIMM_SRC := $(DRIVERS)/nvdimm +ACPI_SRC := $(DRIVERS)/acpi + +obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o +obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o +obj-$(CONFIG_ND_BTT) += nd_btt.o +obj-$(CONFIG_ND_BLK) += nd_blk.o +obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o +obj-$(CONFIG_ACPI_NFIT) += nfit.o + +nfit-y := $(ACPI_SRC)/nfit.o +nfit-y += config_check.o + +nd_pmem-y := $(NVDIMM_SRC)/pmem.o +nd_pmem-y += config_check.o + +nd_btt-y := $(NVDIMM_SRC)/btt.o +nd_btt-y += config_check.o + +nd_blk-y := $(NVDIMM_SRC)/blk.o +nd_blk-y += config_check.o + +nd_e820-y := $(NVDIMM_SRC)/e820.o +nd_e820-y += config_check.o + +libnvdimm-y := $(NVDIMM_SRC)/core.o +libnvdimm-y += $(NVDIMM_SRC)/bus.o +libnvdimm-y += $(NVDIMM_SRC)/dimm_devs.o +libnvdimm-y += $(NVDIMM_SRC)/dimm.o +libnvdimm-y += $(NVDIMM_SRC)/region_devs.o +libnvdimm-y += $(NVDIMM_SRC)/region.o +libnvdimm-y += $(NVDIMM_SRC)/namespace_devs.o +libnvdimm-y += $(NVDIMM_SRC)/label.o +libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o +libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o +libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o +libnvdimm-y += config_check.o + +obj-m += test/ diff --git a/kernel/tools/testing/nvdimm/Makefile b/kernel/tools/testing/nvdimm/Makefile new file mode 100644 index 000000000..3dfe024b4 --- /dev/null +++ b/kernel/tools/testing/nvdimm/Makefile @@ -0,0 +1,7 @@ +KDIR ?= ../../../ + +default: + $(MAKE) -C $(KDIR) M=$$PWD + +install: default + $(MAKE) -C $(KDIR) M=$$PWD modules_install diff --git a/kernel/tools/testing/nvdimm/config_check.c b/kernel/tools/testing/nvdimm/config_check.c new file mode 100644 index 000000000..f2c761555 --- /dev/null +++ b/kernel/tools/testing/nvdimm/config_check.c @@ -0,0 +1,15 @@ +#include <linux/kconfig.h> +#include <linux/bug.h> + +void check(void) +{ + /* + * These kconfig symbols must be set to "m" for nfit_test to + * load and operate. + */ + BUILD_BUG_ON(!IS_MODULE(CONFIG_LIBNVDIMM)); + BUILD_BUG_ON(!IS_MODULE(CONFIG_BLK_DEV_PMEM)); + BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT)); + BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK)); + BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT)); +} diff --git a/kernel/tools/testing/nvdimm/test/Kbuild b/kernel/tools/testing/nvdimm/test/Kbuild new file mode 100644 index 000000000..924106497 --- /dev/null +++ b/kernel/tools/testing/nvdimm/test/Kbuild @@ -0,0 +1,8 @@ +ccflags-y := -I$(src)/../../../../drivers/nvdimm/ +ccflags-y += -I$(src)/../../../../drivers/acpi/ + +obj-m += nfit_test.o +obj-m += nfit_test_iomap.o + +nfit_test-y := nfit.o +nfit_test_iomap-y := iomap.o diff --git a/kernel/tools/testing/nvdimm/test/iomap.c b/kernel/tools/testing/nvdimm/test/iomap.c new file mode 100644 index 000000000..b7251314b --- /dev/null +++ b/kernel/tools/testing/nvdimm/test/iomap.c @@ -0,0 +1,239 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/rculist.h> +#include <linux/export.h> +#include <linux/ioport.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/io.h> +#include "nfit_test.h" + +static LIST_HEAD(iomap_head); + +static struct iomap_ops { + nfit_test_lookup_fn nfit_test_lookup; + struct list_head list; +} iomap_ops = { + .list = LIST_HEAD_INIT(iomap_ops.list), +}; + +void nfit_test_setup(nfit_test_lookup_fn lookup) +{ + iomap_ops.nfit_test_lookup = lookup; + list_add_rcu(&iomap_ops.list, &iomap_head); +} +EXPORT_SYMBOL(nfit_test_setup); + +void nfit_test_teardown(void) +{ + list_del_rcu(&iomap_ops.list); + synchronize_rcu(); +} +EXPORT_SYMBOL(nfit_test_teardown); + +static struct nfit_test_resource *get_nfit_res(resource_size_t resource) +{ + struct iomap_ops *ops; + + ops = list_first_or_null_rcu(&iomap_head, typeof(*ops), list); + if (ops) + return ops->nfit_test_lookup(resource); + return NULL; +} + +void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size, + void __iomem *(*fallback_fn)(resource_size_t, unsigned long)) +{ + struct nfit_test_resource *nfit_res; + + rcu_read_lock(); + nfit_res = get_nfit_res(offset); + rcu_read_unlock(); + if (nfit_res) + return (void __iomem *) nfit_res->buf + offset + - nfit_res->res->start; + return fallback_fn(offset, size); +} + +void __iomem *__wrap_devm_ioremap_nocache(struct device *dev, + resource_size_t offset, unsigned long size) +{ + struct nfit_test_resource *nfit_res; + + rcu_read_lock(); + nfit_res = get_nfit_res(offset); + rcu_read_unlock(); + if (nfit_res) + return (void __iomem *) nfit_res->buf + offset + - nfit_res->res->start; + return devm_ioremap_nocache(dev, offset, size); +} +EXPORT_SYMBOL(__wrap_devm_ioremap_nocache); + +void *__wrap_devm_memremap(struct device *dev, resource_size_t offset, + size_t size, unsigned long flags) +{ + struct nfit_test_resource *nfit_res; + + rcu_read_lock(); + nfit_res = get_nfit_res(offset); + rcu_read_unlock(); + if (nfit_res) + return nfit_res->buf + offset - nfit_res->res->start; + return devm_memremap(dev, offset, size, flags); +} +EXPORT_SYMBOL(__wrap_devm_memremap); + +void *__wrap_memremap(resource_size_t offset, size_t size, + unsigned long flags) +{ + struct nfit_test_resource *nfit_res; + + rcu_read_lock(); + nfit_res = get_nfit_res(offset); + rcu_read_unlock(); + if (nfit_res) + return nfit_res->buf + offset - nfit_res->res->start; + return memremap(offset, size, flags); +} +EXPORT_SYMBOL(__wrap_memremap); + +void __wrap_devm_memunmap(struct device *dev, void *addr) +{ + struct nfit_test_resource *nfit_res; + + rcu_read_lock(); + nfit_res = get_nfit_res((unsigned long) addr); + rcu_read_unlock(); + if (nfit_res) + return; + return devm_memunmap(dev, addr); +} +EXPORT_SYMBOL(__wrap_devm_memunmap); + +void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size) +{ + return __nfit_test_ioremap(offset, size, ioremap_nocache); +} +EXPORT_SYMBOL(__wrap_ioremap_nocache); + +void __iomem *__wrap_ioremap_wc(resource_size_t offset, unsigned long size) +{ + return __nfit_test_ioremap(offset, size, ioremap_wc); +} +EXPORT_SYMBOL(__wrap_ioremap_wc); + +void __wrap_iounmap(volatile void __iomem *addr) +{ + struct nfit_test_resource *nfit_res; + + rcu_read_lock(); + nfit_res = get_nfit_res((unsigned long) addr); + rcu_read_unlock(); + if (nfit_res) + return; + return iounmap(addr); +} +EXPORT_SYMBOL(__wrap_iounmap); + +void __wrap_memunmap(void *addr) +{ + struct nfit_test_resource *nfit_res; + + rcu_read_lock(); + nfit_res = get_nfit_res((unsigned long) addr); + rcu_read_unlock(); + if (nfit_res) + return; + return memunmap(addr); +} +EXPORT_SYMBOL(__wrap_memunmap); + +static struct resource *nfit_test_request_region(struct device *dev, + struct resource *parent, resource_size_t start, + resource_size_t n, const char *name, int flags) +{ + struct nfit_test_resource *nfit_res; + + if (parent == &iomem_resource) { + rcu_read_lock(); + nfit_res = get_nfit_res(start); + rcu_read_unlock(); + if (nfit_res) { + struct resource *res = nfit_res->res + 1; + + if (start + n > nfit_res->res->start + + resource_size(nfit_res->res)) { + pr_debug("%s: start: %llx n: %llx overflow: %pr\n", + __func__, start, n, + nfit_res->res); + return NULL; + } + + res->start = start; + res->end = start + n - 1; + res->name = name; + res->flags = resource_type(parent); + res->flags |= IORESOURCE_BUSY | flags; + pr_debug("%s: %pr\n", __func__, res); + return res; + } + } + if (dev) + return __devm_request_region(dev, parent, start, n, name); + return __request_region(parent, start, n, name, flags); +} + +struct resource *__wrap___request_region(struct resource *parent, + resource_size_t start, resource_size_t n, const char *name, + int flags) +{ + return nfit_test_request_region(NULL, parent, start, n, name, flags); +} +EXPORT_SYMBOL(__wrap___request_region); + +struct resource *__wrap___devm_request_region(struct device *dev, + struct resource *parent, resource_size_t start, + resource_size_t n, const char *name) +{ + if (!dev) + return NULL; + return nfit_test_request_region(dev, parent, start, n, name, 0); +} +EXPORT_SYMBOL(__wrap___devm_request_region); + +void __wrap___release_region(struct resource *parent, resource_size_t start, + resource_size_t n) +{ + struct nfit_test_resource *nfit_res; + + if (parent == &iomem_resource) { + rcu_read_lock(); + nfit_res = get_nfit_res(start); + rcu_read_unlock(); + if (nfit_res) { + struct resource *res = nfit_res->res + 1; + + if (start != res->start || resource_size(res) != n) + pr_info("%s: start: %llx n: %llx mismatch: %pr\n", + __func__, start, n, res); + else + memset(res, 0, sizeof(*res)); + return; + } + } + __release_region(parent, start, n); +} +EXPORT_SYMBOL(__wrap___release_region); + +MODULE_LICENSE("GPL v2"); diff --git a/kernel/tools/testing/nvdimm/test/nfit.c b/kernel/tools/testing/nvdimm/test/nfit.c new file mode 100644 index 000000000..51cf8256c --- /dev/null +++ b/kernel/tools/testing/nvdimm/test/nfit.c @@ -0,0 +1,1388 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/platform_device.h> +#include <linux/dma-mapping.h> +#include <linux/libnvdimm.h> +#include <linux/vmalloc.h> +#include <linux/device.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/ndctl.h> +#include <linux/sizes.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <nfit.h> +#include <nd.h> +#include "nfit_test.h" + +/* + * Generate an NFIT table to describe the following topology: + * + * BUS0: Interleaved PMEM regions, and aliasing with BLK regions + * + * (a) (b) DIMM BLK-REGION + * +----------+--------------+----------+---------+ + * +------+ | blk2.0 | pm0.0 | blk2.1 | pm1.0 | 0 region2 + * | imc0 +--+- - - - - region0 - - - -+----------+ + + * +--+---+ | blk3.0 | pm0.0 | blk3.1 | pm1.0 | 1 region3 + * | +----------+--------------v----------v v + * +--+---+ | | + * | cpu0 | region1 + * +--+---+ | | + * | +-------------------------^----------^ ^ + * +--+---+ | blk4.0 | pm1.0 | 2 region4 + * | imc1 +--+-------------------------+----------+ + + * +------+ | blk5.0 | pm1.0 | 3 region5 + * +-------------------------+----------+-+-------+ + * + * +--+---+ + * | cpu1 | + * +--+---+ (Hotplug DIMM) + * | +----------------------------------------------+ + * +--+---+ | blk6.0/pm7.0 | 4 region6/7 + * | imc0 +--+----------------------------------------------+ + * +------+ + * + * + * *) In this layout we have four dimms and two memory controllers in one + * socket. Each unique interface (BLK or PMEM) to DPA space + * is identified by a region device with a dynamically assigned id. + * + * *) The first portion of dimm0 and dimm1 are interleaved as REGION0. + * A single PMEM namespace "pm0.0" is created using half of the + * REGION0 SPA-range. REGION0 spans dimm0 and dimm1. PMEM namespace + * allocate from from the bottom of a region. The unallocated + * portion of REGION0 aliases with REGION2 and REGION3. That + * unallacted capacity is reclaimed as BLK namespaces ("blk2.0" and + * "blk3.0") starting at the base of each DIMM to offset (a) in those + * DIMMs. "pm0.0", "blk2.0" and "blk3.0" are free-form readable + * names that can be assigned to a namespace. + * + * *) In the last portion of dimm0 and dimm1 we have an interleaved + * SPA range, REGION1, that spans those two dimms as well as dimm2 + * and dimm3. Some of REGION1 allocated to a PMEM namespace named + * "pm1.0" the rest is reclaimed in 4 BLK namespaces (for each + * dimm in the interleave set), "blk2.1", "blk3.1", "blk4.0", and + * "blk5.0". + * + * *) The portion of dimm2 and dimm3 that do not participate in the + * REGION1 interleaved SPA range (i.e. the DPA address below offset + * (b) are also included in the "blk4.0" and "blk5.0" namespaces. + * Note, that BLK namespaces need not be contiguous in DPA-space, and + * can consume aliased capacity from multiple interleave sets. + * + * BUS1: Legacy NVDIMM (single contiguous range) + * + * region2 + * +---------------------+ + * |---------------------| + * || pm2.0 || + * |---------------------| + * +---------------------+ + * + * *) A NFIT-table may describe a simple system-physical-address range + * with no BLK aliasing. This type of region may optionally + * reference an NVDIMM. + */ +enum { + NUM_PM = 3, + NUM_DCR = 5, + NUM_BDW = NUM_DCR, + NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW, + NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */, + DIMM_SIZE = SZ_32M, + LABEL_SIZE = SZ_128K, + SPA0_SIZE = DIMM_SIZE, + SPA1_SIZE = DIMM_SIZE*2, + SPA2_SIZE = DIMM_SIZE, + BDW_SIZE = 64 << 8, + DCR_SIZE = 12, + NUM_NFITS = 2, /* permit testing multiple NFITs per system */ +}; + +struct nfit_test_dcr { + __le64 bdw_addr; + __le32 bdw_status; + __u8 aperature[BDW_SIZE]; +}; + +#define NFIT_DIMM_HANDLE(node, socket, imc, chan, dimm) \ + (((node & 0xfff) << 16) | ((socket & 0xf) << 12) \ + | ((imc & 0xf) << 8) | ((chan & 0xf) << 4) | (dimm & 0xf)) + +static u32 handle[NUM_DCR] = { + [0] = NFIT_DIMM_HANDLE(0, 0, 0, 0, 0), + [1] = NFIT_DIMM_HANDLE(0, 0, 0, 0, 1), + [2] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 0), + [3] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 1), + [4] = NFIT_DIMM_HANDLE(0, 1, 0, 0, 0), +}; + +struct nfit_test { + struct acpi_nfit_desc acpi_desc; + struct platform_device pdev; + struct list_head resources; + void *nfit_buf; + dma_addr_t nfit_dma; + size_t nfit_size; + int num_dcr; + int num_pm; + void **dimm; + dma_addr_t *dimm_dma; + void **flush; + dma_addr_t *flush_dma; + void **label; + dma_addr_t *label_dma; + void **spa_set; + dma_addr_t *spa_set_dma; + struct nfit_test_dcr **dcr; + dma_addr_t *dcr_dma; + int (*alloc)(struct nfit_test *t); + void (*setup)(struct nfit_test *t); + int setup_hotplug; +}; + +static struct nfit_test *to_nfit_test(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + + return container_of(pdev, struct nfit_test, pdev); +} + +static int nfit_test_cmd_get_config_size(struct nd_cmd_get_config_size *nd_cmd, + unsigned int buf_len) +{ + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + nd_cmd->status = 0; + nd_cmd->config_size = LABEL_SIZE; + nd_cmd->max_xfer = SZ_4K; + + return 0; +} + +static int nfit_test_cmd_get_config_data(struct nd_cmd_get_config_data_hdr + *nd_cmd, unsigned int buf_len, void *label) +{ + unsigned int len, offset = nd_cmd->in_offset; + int rc; + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + if (offset >= LABEL_SIZE) + return -EINVAL; + if (nd_cmd->in_length + sizeof(*nd_cmd) > buf_len) + return -EINVAL; + + nd_cmd->status = 0; + len = min(nd_cmd->in_length, LABEL_SIZE - offset); + memcpy(nd_cmd->out_buf, label + offset, len); + rc = buf_len - sizeof(*nd_cmd) - len; + + return rc; +} + +static int nfit_test_cmd_set_config_data(struct nd_cmd_set_config_hdr *nd_cmd, + unsigned int buf_len, void *label) +{ + unsigned int len, offset = nd_cmd->in_offset; + u32 *status; + int rc; + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + if (offset >= LABEL_SIZE) + return -EINVAL; + if (nd_cmd->in_length + sizeof(*nd_cmd) + 4 > buf_len) + return -EINVAL; + + status = (void *)nd_cmd + nd_cmd->in_length + sizeof(*nd_cmd); + *status = 0; + len = min(nd_cmd->in_length, LABEL_SIZE - offset); + memcpy(label + offset, nd_cmd->in_buf, len); + rc = buf_len - sizeof(*nd_cmd) - (len + 4); + + return rc; +} + +static int nfit_test_cmd_ars_cap(struct nd_cmd_ars_cap *nd_cmd, + unsigned int buf_len) +{ + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + nd_cmd->max_ars_out = 256; + nd_cmd->status = (ND_ARS_PERSISTENT | ND_ARS_VOLATILE) << 16; + + return 0; +} + +static int nfit_test_cmd_ars_start(struct nd_cmd_ars_start *nd_cmd, + unsigned int buf_len) +{ + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + nd_cmd->status = 0; + + return 0; +} + +static int nfit_test_cmd_ars_status(struct nd_cmd_ars_status *nd_cmd, + unsigned int buf_len) +{ + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + + nd_cmd->out_length = 256; + nd_cmd->num_records = 0; + nd_cmd->status = 0; + + return 0; +} + +static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, unsigned int cmd, void *buf, + unsigned int buf_len) +{ + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); + struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc); + int i, rc = 0; + + if (nvdimm) { + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + + if (!nfit_mem || !test_bit(cmd, &nfit_mem->dsm_mask)) + return -ENOTTY; + + /* lookup label space for the given dimm */ + for (i = 0; i < ARRAY_SIZE(handle); i++) + if (__to_nfit_memdev(nfit_mem)->device_handle == + handle[i]) + break; + if (i >= ARRAY_SIZE(handle)) + return -ENXIO; + + switch (cmd) { + case ND_CMD_GET_CONFIG_SIZE: + rc = nfit_test_cmd_get_config_size(buf, buf_len); + break; + case ND_CMD_GET_CONFIG_DATA: + rc = nfit_test_cmd_get_config_data(buf, buf_len, + t->label[i]); + break; + case ND_CMD_SET_CONFIG_DATA: + rc = nfit_test_cmd_set_config_data(buf, buf_len, + t->label[i]); + break; + default: + return -ENOTTY; + } + } else { + if (!nd_desc || !test_bit(cmd, &nd_desc->dsm_mask)) + return -ENOTTY; + + switch (cmd) { + case ND_CMD_ARS_CAP: + rc = nfit_test_cmd_ars_cap(buf, buf_len); + break; + case ND_CMD_ARS_START: + rc = nfit_test_cmd_ars_start(buf, buf_len); + break; + case ND_CMD_ARS_STATUS: + rc = nfit_test_cmd_ars_status(buf, buf_len); + break; + default: + return -ENOTTY; + } + } + + return rc; +} + +static DEFINE_SPINLOCK(nfit_test_lock); +static struct nfit_test *instances[NUM_NFITS]; + +static void release_nfit_res(void *data) +{ + struct nfit_test_resource *nfit_res = data; + struct resource *res = nfit_res->res; + + spin_lock(&nfit_test_lock); + list_del(&nfit_res->list); + spin_unlock(&nfit_test_lock); + + if (is_vmalloc_addr(nfit_res->buf)) + vfree(nfit_res->buf); + else + dma_free_coherent(nfit_res->dev, resource_size(res), + nfit_res->buf, res->start); + kfree(res); + kfree(nfit_res); +} + +static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma, + void *buf) +{ + struct device *dev = &t->pdev.dev; + struct resource *res = kzalloc(sizeof(*res) * 2, GFP_KERNEL); + struct nfit_test_resource *nfit_res = kzalloc(sizeof(*nfit_res), + GFP_KERNEL); + int rc; + + if (!res || !buf || !nfit_res) + goto err; + rc = devm_add_action(dev, release_nfit_res, nfit_res); + if (rc) + goto err; + INIT_LIST_HEAD(&nfit_res->list); + memset(buf, 0, size); + nfit_res->dev = dev; + nfit_res->buf = buf; + nfit_res->res = res; + res->start = *dma; + res->end = *dma + size - 1; + res->name = "NFIT"; + spin_lock(&nfit_test_lock); + list_add(&nfit_res->list, &t->resources); + spin_unlock(&nfit_test_lock); + + return nfit_res->buf; + err: + if (buf && !is_vmalloc_addr(buf)) + dma_free_coherent(dev, size, buf, *dma); + else if (buf) + vfree(buf); + kfree(res); + kfree(nfit_res); + return NULL; +} + +static void *test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma) +{ + void *buf = vmalloc(size); + + *dma = (unsigned long) buf; + return __test_alloc(t, size, dma, buf); +} + +static void *test_alloc_coherent(struct nfit_test *t, size_t size, + dma_addr_t *dma) +{ + struct device *dev = &t->pdev.dev; + void *buf = dma_alloc_coherent(dev, size, dma, GFP_KERNEL); + + return __test_alloc(t, size, dma, buf); +} + +static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(instances); i++) { + struct nfit_test_resource *n, *nfit_res = NULL; + struct nfit_test *t = instances[i]; + + if (!t) + continue; + spin_lock(&nfit_test_lock); + list_for_each_entry(n, &t->resources, list) { + if (addr >= n->res->start && (addr < n->res->start + + resource_size(n->res))) { + nfit_res = n; + break; + } else if (addr >= (unsigned long) n->buf + && (addr < (unsigned long) n->buf + + resource_size(n->res))) { + nfit_res = n; + break; + } + } + spin_unlock(&nfit_test_lock); + if (nfit_res) + return nfit_res; + } + + return NULL; +} + +static int nfit_test0_alloc(struct nfit_test *t) +{ + size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA + + sizeof(struct acpi_nfit_memory_map) * NUM_MEM + + sizeof(struct acpi_nfit_control_region) * NUM_DCR + + sizeof(struct acpi_nfit_data_region) * NUM_BDW + + sizeof(struct acpi_nfit_flush_address) * NUM_DCR; + int i; + + t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); + if (!t->nfit_buf) + return -ENOMEM; + t->nfit_size = nfit_size; + + t->spa_set[0] = test_alloc_coherent(t, SPA0_SIZE, &t->spa_set_dma[0]); + if (!t->spa_set[0]) + return -ENOMEM; + + t->spa_set[1] = test_alloc_coherent(t, SPA1_SIZE, &t->spa_set_dma[1]); + if (!t->spa_set[1]) + return -ENOMEM; + + t->spa_set[2] = test_alloc_coherent(t, SPA0_SIZE, &t->spa_set_dma[2]); + if (!t->spa_set[2]) + return -ENOMEM; + + for (i = 0; i < NUM_DCR; i++) { + t->dimm[i] = test_alloc(t, DIMM_SIZE, &t->dimm_dma[i]); + if (!t->dimm[i]) + return -ENOMEM; + + t->label[i] = test_alloc(t, LABEL_SIZE, &t->label_dma[i]); + if (!t->label[i]) + return -ENOMEM; + sprintf(t->label[i], "label%d", i); + + t->flush[i] = test_alloc(t, 8, &t->flush_dma[i]); + if (!t->flush[i]) + return -ENOMEM; + } + + for (i = 0; i < NUM_DCR; i++) { + t->dcr[i] = test_alloc(t, LABEL_SIZE, &t->dcr_dma[i]); + if (!t->dcr[i]) + return -ENOMEM; + } + + return 0; +} + +static int nfit_test1_alloc(struct nfit_test *t) +{ + size_t nfit_size = sizeof(struct acpi_nfit_system_address) + + sizeof(struct acpi_nfit_memory_map) + + sizeof(struct acpi_nfit_control_region); + + t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); + if (!t->nfit_buf) + return -ENOMEM; + t->nfit_size = nfit_size; + + t->spa_set[0] = test_alloc_coherent(t, SPA2_SIZE, &t->spa_set_dma[0]); + if (!t->spa_set[0]) + return -ENOMEM; + + return 0; +} + +static void nfit_test0_setup(struct nfit_test *t) +{ + struct nvdimm_bus_descriptor *nd_desc; + struct acpi_nfit_desc *acpi_desc; + struct acpi_nfit_memory_map *memdev; + void *nfit_buf = t->nfit_buf; + struct acpi_nfit_system_address *spa; + struct acpi_nfit_control_region *dcr; + struct acpi_nfit_data_region *bdw; + struct acpi_nfit_flush_address *flush; + unsigned int offset; + + /* + * spa0 (interleave first half of dimm0 and dimm1, note storage + * does not actually alias the related block-data-window + * regions) + */ + spa = nfit_buf; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); + spa->range_index = 0+1; + spa->address = t->spa_set_dma[0]; + spa->length = SPA0_SIZE; + + /* + * spa1 (interleave last half of the 4 DIMMS, note storage + * does not actually alias the related block-data-window + * regions) + */ + spa = nfit_buf + sizeof(*spa); + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); + spa->range_index = 1+1; + spa->address = t->spa_set_dma[1]; + spa->length = SPA1_SIZE; + + /* spa2 (dcr0) dimm0 */ + spa = nfit_buf + sizeof(*spa) * 2; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); + spa->range_index = 2+1; + spa->address = t->dcr_dma[0]; + spa->length = DCR_SIZE; + + /* spa3 (dcr1) dimm1 */ + spa = nfit_buf + sizeof(*spa) * 3; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); + spa->range_index = 3+1; + spa->address = t->dcr_dma[1]; + spa->length = DCR_SIZE; + + /* spa4 (dcr2) dimm2 */ + spa = nfit_buf + sizeof(*spa) * 4; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); + spa->range_index = 4+1; + spa->address = t->dcr_dma[2]; + spa->length = DCR_SIZE; + + /* spa5 (dcr3) dimm3 */ + spa = nfit_buf + sizeof(*spa) * 5; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); + spa->range_index = 5+1; + spa->address = t->dcr_dma[3]; + spa->length = DCR_SIZE; + + /* spa6 (bdw for dcr0) dimm0 */ + spa = nfit_buf + sizeof(*spa) * 6; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); + spa->range_index = 6+1; + spa->address = t->dimm_dma[0]; + spa->length = DIMM_SIZE; + + /* spa7 (bdw for dcr1) dimm1 */ + spa = nfit_buf + sizeof(*spa) * 7; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); + spa->range_index = 7+1; + spa->address = t->dimm_dma[1]; + spa->length = DIMM_SIZE; + + /* spa8 (bdw for dcr2) dimm2 */ + spa = nfit_buf + sizeof(*spa) * 8; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); + spa->range_index = 8+1; + spa->address = t->dimm_dma[2]; + spa->length = DIMM_SIZE; + + /* spa9 (bdw for dcr3) dimm3 */ + spa = nfit_buf + sizeof(*spa) * 9; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); + spa->range_index = 9+1; + spa->address = t->dimm_dma[3]; + spa->length = DIMM_SIZE; + + offset = sizeof(*spa) * 10; + /* mem-region0 (spa0, dimm0) */ + memdev = nfit_buf + offset; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[0]; + memdev->physical_id = 0; + memdev->region_id = 0; + memdev->range_index = 0+1; + memdev->region_index = 0+1; + memdev->region_size = SPA0_SIZE/2; + memdev->region_offset = t->spa_set_dma[0]; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 2; + + /* mem-region1 (spa0, dimm1) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map); + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[1]; + memdev->physical_id = 1; + memdev->region_id = 0; + memdev->range_index = 0+1; + memdev->region_index = 1+1; + memdev->region_size = SPA0_SIZE/2; + memdev->region_offset = t->spa_set_dma[0] + SPA0_SIZE/2; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 2; + + /* mem-region2 (spa1, dimm0) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 2; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[0]; + memdev->physical_id = 0; + memdev->region_id = 1; + memdev->range_index = 1+1; + memdev->region_index = 0+1; + memdev->region_size = SPA1_SIZE/4; + memdev->region_offset = t->spa_set_dma[1]; + memdev->address = SPA0_SIZE/2; + memdev->interleave_index = 0; + memdev->interleave_ways = 4; + + /* mem-region3 (spa1, dimm1) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 3; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[1]; + memdev->physical_id = 1; + memdev->region_id = 1; + memdev->range_index = 1+1; + memdev->region_index = 1+1; + memdev->region_size = SPA1_SIZE/4; + memdev->region_offset = t->spa_set_dma[1] + SPA1_SIZE/4; + memdev->address = SPA0_SIZE/2; + memdev->interleave_index = 0; + memdev->interleave_ways = 4; + + /* mem-region4 (spa1, dimm2) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 4; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[2]; + memdev->physical_id = 2; + memdev->region_id = 0; + memdev->range_index = 1+1; + memdev->region_index = 2+1; + memdev->region_size = SPA1_SIZE/4; + memdev->region_offset = t->spa_set_dma[1] + 2*SPA1_SIZE/4; + memdev->address = SPA0_SIZE/2; + memdev->interleave_index = 0; + memdev->interleave_ways = 4; + + /* mem-region5 (spa1, dimm3) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 5; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[3]; + memdev->physical_id = 3; + memdev->region_id = 0; + memdev->range_index = 1+1; + memdev->region_index = 3+1; + memdev->region_size = SPA1_SIZE/4; + memdev->region_offset = t->spa_set_dma[1] + 3*SPA1_SIZE/4; + memdev->address = SPA0_SIZE/2; + memdev->interleave_index = 0; + memdev->interleave_ways = 4; + + /* mem-region6 (spa/dcr0, dimm0) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 6; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[0]; + memdev->physical_id = 0; + memdev->region_id = 0; + memdev->range_index = 2+1; + memdev->region_index = 0+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + /* mem-region7 (spa/dcr1, dimm1) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 7; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[1]; + memdev->physical_id = 1; + memdev->region_id = 0; + memdev->range_index = 3+1; + memdev->region_index = 1+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + /* mem-region8 (spa/dcr2, dimm2) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 8; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[2]; + memdev->physical_id = 2; + memdev->region_id = 0; + memdev->range_index = 4+1; + memdev->region_index = 2+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + /* mem-region9 (spa/dcr3, dimm3) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 9; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[3]; + memdev->physical_id = 3; + memdev->region_id = 0; + memdev->range_index = 5+1; + memdev->region_index = 3+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + /* mem-region10 (spa/bdw0, dimm0) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 10; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[0]; + memdev->physical_id = 0; + memdev->region_id = 0; + memdev->range_index = 6+1; + memdev->region_index = 0+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + /* mem-region11 (spa/bdw1, dimm1) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 11; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[1]; + memdev->physical_id = 1; + memdev->region_id = 0; + memdev->range_index = 7+1; + memdev->region_index = 1+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + /* mem-region12 (spa/bdw2, dimm2) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 12; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[2]; + memdev->physical_id = 2; + memdev->region_id = 0; + memdev->range_index = 8+1; + memdev->region_index = 2+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + /* mem-region13 (spa/dcr3, dimm3) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 13; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[3]; + memdev->physical_id = 3; + memdev->region_id = 0; + memdev->range_index = 9+1; + memdev->region_index = 3+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + offset = offset + sizeof(struct acpi_nfit_memory_map) * 14; + /* dcr-descriptor0 */ + dcr = nfit_buf + offset; + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->region_index = 0+1; + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->serial_number = ~handle[0]; + dcr->windows = 1; + dcr->window_size = DCR_SIZE; + dcr->command_offset = 0; + dcr->command_size = 8; + dcr->status_offset = 8; + dcr->status_size = 4; + + /* dcr-descriptor1 */ + dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region); + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->region_index = 1+1; + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->serial_number = ~handle[1]; + dcr->windows = 1; + dcr->window_size = DCR_SIZE; + dcr->command_offset = 0; + dcr->command_size = 8; + dcr->status_offset = 8; + dcr->status_size = 4; + + /* dcr-descriptor2 */ + dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 2; + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->region_index = 2+1; + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->serial_number = ~handle[2]; + dcr->windows = 1; + dcr->window_size = DCR_SIZE; + dcr->command_offset = 0; + dcr->command_size = 8; + dcr->status_offset = 8; + dcr->status_size = 4; + + /* dcr-descriptor3 */ + dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 3; + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->region_index = 3+1; + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->serial_number = ~handle[3]; + dcr->windows = 1; + dcr->window_size = DCR_SIZE; + dcr->command_offset = 0; + dcr->command_size = 8; + dcr->status_offset = 8; + dcr->status_size = 4; + + offset = offset + sizeof(struct acpi_nfit_control_region) * 4; + /* bdw0 (spa/dcr0, dimm0) */ + bdw = nfit_buf + offset; + bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; + bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->region_index = 0+1; + bdw->windows = 1; + bdw->offset = 0; + bdw->size = BDW_SIZE; + bdw->capacity = DIMM_SIZE; + bdw->start_address = 0; + + /* bdw1 (spa/dcr1, dimm1) */ + bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region); + bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; + bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->region_index = 1+1; + bdw->windows = 1; + bdw->offset = 0; + bdw->size = BDW_SIZE; + bdw->capacity = DIMM_SIZE; + bdw->start_address = 0; + + /* bdw2 (spa/dcr2, dimm2) */ + bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 2; + bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; + bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->region_index = 2+1; + bdw->windows = 1; + bdw->offset = 0; + bdw->size = BDW_SIZE; + bdw->capacity = DIMM_SIZE; + bdw->start_address = 0; + + /* bdw3 (spa/dcr3, dimm3) */ + bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 3; + bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; + bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->region_index = 3+1; + bdw->windows = 1; + bdw->offset = 0; + bdw->size = BDW_SIZE; + bdw->capacity = DIMM_SIZE; + bdw->start_address = 0; + + offset = offset + sizeof(struct acpi_nfit_data_region) * 4; + /* flush0 (dimm0) */ + flush = nfit_buf + offset; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[0]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[0]; + + /* flush1 (dimm1) */ + flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 1; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[1]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[1]; + + /* flush2 (dimm2) */ + flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 2; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[2]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[2]; + + /* flush3 (dimm3) */ + flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 3; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[3]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[3]; + + if (t->setup_hotplug) { + offset = offset + sizeof(struct acpi_nfit_flush_address) * 4; + /* dcr-descriptor4 */ + dcr = nfit_buf + offset; + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->region_index = 4+1; + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->serial_number = ~handle[4]; + dcr->windows = 1; + dcr->window_size = DCR_SIZE; + dcr->command_offset = 0; + dcr->command_size = 8; + dcr->status_offset = 8; + dcr->status_size = 4; + + offset = offset + sizeof(struct acpi_nfit_control_region); + /* bdw4 (spa/dcr4, dimm4) */ + bdw = nfit_buf + offset; + bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; + bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->region_index = 4+1; + bdw->windows = 1; + bdw->offset = 0; + bdw->size = BDW_SIZE; + bdw->capacity = DIMM_SIZE; + bdw->start_address = 0; + + offset = offset + sizeof(struct acpi_nfit_data_region); + /* spa10 (dcr4) dimm4 */ + spa = nfit_buf + offset; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); + spa->range_index = 10+1; + spa->address = t->dcr_dma[4]; + spa->length = DCR_SIZE; + + /* + * spa11 (single-dimm interleave for hotplug, note storage + * does not actually alias the related block-data-window + * regions) + */ + spa = nfit_buf + offset + sizeof(*spa); + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); + spa->range_index = 11+1; + spa->address = t->spa_set_dma[2]; + spa->length = SPA0_SIZE; + + /* spa12 (bdw for dcr4) dimm4 */ + spa = nfit_buf + offset + sizeof(*spa) * 2; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); + spa->range_index = 12+1; + spa->address = t->dimm_dma[4]; + spa->length = DIMM_SIZE; + + offset = offset + sizeof(*spa) * 3; + /* mem-region14 (spa/dcr4, dimm4) */ + memdev = nfit_buf + offset; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[4]; + memdev->physical_id = 4; + memdev->region_id = 0; + memdev->range_index = 10+1; + memdev->region_index = 4+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + /* mem-region15 (spa0, dimm4) */ + memdev = nfit_buf + offset + + sizeof(struct acpi_nfit_memory_map); + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[4]; + memdev->physical_id = 4; + memdev->region_id = 0; + memdev->range_index = 11+1; + memdev->region_index = 4+1; + memdev->region_size = SPA0_SIZE; + memdev->region_offset = t->spa_set_dma[2]; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + /* mem-region16 (spa/dcr4, dimm4) */ + memdev = nfit_buf + offset + + sizeof(struct acpi_nfit_memory_map) * 2; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[4]; + memdev->physical_id = 4; + memdev->region_id = 0; + memdev->range_index = 12+1; + memdev->region_index = 4+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + offset = offset + sizeof(struct acpi_nfit_memory_map) * 3; + /* flush3 (dimm4) */ + flush = nfit_buf + offset; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[4]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[4]; + } + + acpi_desc = &t->acpi_desc; + set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_dsm_force_en); + set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); + set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); + set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en); + set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en); + set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en); + nd_desc = &acpi_desc->nd_desc; + nd_desc->ndctl = nfit_test_ctl; +} + +static void nfit_test1_setup(struct nfit_test *t) +{ + size_t offset; + void *nfit_buf = t->nfit_buf; + struct acpi_nfit_memory_map *memdev; + struct acpi_nfit_control_region *dcr; + struct acpi_nfit_system_address *spa; + + offset = 0; + /* spa0 (flat range with no bdw aliasing) */ + spa = nfit_buf + offset; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); + spa->range_index = 0+1; + spa->address = t->spa_set_dma[0]; + spa->length = SPA2_SIZE; + + offset += sizeof(*spa); + /* mem-region0 (spa0, dimm0) */ + memdev = nfit_buf + offset; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = 0; + memdev->physical_id = 0; + memdev->region_id = 0; + memdev->range_index = 0+1; + memdev->region_index = 0+1; + memdev->region_size = SPA2_SIZE; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + memdev->flags = ACPI_NFIT_MEM_SAVE_FAILED | ACPI_NFIT_MEM_RESTORE_FAILED + | ACPI_NFIT_MEM_FLUSH_FAILED | ACPI_NFIT_MEM_HEALTH_OBSERVED + | ACPI_NFIT_MEM_NOT_ARMED; + + offset += sizeof(*memdev); + /* dcr-descriptor0 */ + dcr = nfit_buf + offset; + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->region_index = 0+1; + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->serial_number = ~0; + dcr->code = 0x201; + dcr->windows = 0; + dcr->window_size = 0; + dcr->command_offset = 0; + dcr->command_size = 0; + dcr->status_offset = 0; + dcr->status_size = 0; +} + +static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, + void *iobuf, u64 len, int rw) +{ + struct nfit_blk *nfit_blk = ndbr->blk_provider_data; + struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW]; + struct nd_region *nd_region = &ndbr->nd_region; + unsigned int lane; + + lane = nd_region_acquire_lane(nd_region); + if (rw) + memcpy(mmio->addr.base + dpa, iobuf, len); + else { + memcpy(iobuf, mmio->addr.base + dpa, len); + + /* give us some some coverage of the mmio_flush_range() API */ + mmio_flush_range(mmio->addr.base + dpa, len); + } + nd_region_release_lane(nd_region, lane); + + return 0; +} + +static int nfit_test_probe(struct platform_device *pdev) +{ + struct nvdimm_bus_descriptor *nd_desc; + struct acpi_nfit_desc *acpi_desc; + struct device *dev = &pdev->dev; + struct nfit_test *nfit_test; + int rc; + + nfit_test = to_nfit_test(&pdev->dev); + + /* common alloc */ + if (nfit_test->num_dcr) { + int num = nfit_test->num_dcr; + + nfit_test->dimm = devm_kcalloc(dev, num, sizeof(void *), + GFP_KERNEL); + nfit_test->dimm_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t), + GFP_KERNEL); + nfit_test->flush = devm_kcalloc(dev, num, sizeof(void *), + GFP_KERNEL); + nfit_test->flush_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t), + GFP_KERNEL); + nfit_test->label = devm_kcalloc(dev, num, sizeof(void *), + GFP_KERNEL); + nfit_test->label_dma = devm_kcalloc(dev, num, + sizeof(dma_addr_t), GFP_KERNEL); + nfit_test->dcr = devm_kcalloc(dev, num, + sizeof(struct nfit_test_dcr *), GFP_KERNEL); + nfit_test->dcr_dma = devm_kcalloc(dev, num, + sizeof(dma_addr_t), GFP_KERNEL); + if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label + && nfit_test->label_dma && nfit_test->dcr + && nfit_test->dcr_dma && nfit_test->flush + && nfit_test->flush_dma) + /* pass */; + else + return -ENOMEM; + } + + if (nfit_test->num_pm) { + int num = nfit_test->num_pm; + + nfit_test->spa_set = devm_kcalloc(dev, num, sizeof(void *), + GFP_KERNEL); + nfit_test->spa_set_dma = devm_kcalloc(dev, num, + sizeof(dma_addr_t), GFP_KERNEL); + if (nfit_test->spa_set && nfit_test->spa_set_dma) + /* pass */; + else + return -ENOMEM; + } + + /* per-nfit specific alloc */ + if (nfit_test->alloc(nfit_test)) + return -ENOMEM; + + nfit_test->setup(nfit_test); + acpi_desc = &nfit_test->acpi_desc; + acpi_desc->dev = &pdev->dev; + acpi_desc->nfit = nfit_test->nfit_buf; + acpi_desc->blk_do_io = nfit_test_blk_do_io; + nd_desc = &acpi_desc->nd_desc; + nd_desc->attr_groups = acpi_nfit_attribute_groups; + acpi_desc->nvdimm_bus = nvdimm_bus_register(&pdev->dev, nd_desc); + if (!acpi_desc->nvdimm_bus) + return -ENXIO; + + INIT_LIST_HEAD(&acpi_desc->spa_maps); + INIT_LIST_HEAD(&acpi_desc->spas); + INIT_LIST_HEAD(&acpi_desc->dcrs); + INIT_LIST_HEAD(&acpi_desc->bdws); + INIT_LIST_HEAD(&acpi_desc->idts); + INIT_LIST_HEAD(&acpi_desc->flushes); + INIT_LIST_HEAD(&acpi_desc->memdevs); + INIT_LIST_HEAD(&acpi_desc->dimms); + mutex_init(&acpi_desc->spa_map_mutex); + mutex_init(&acpi_desc->init_mutex); + + rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size); + if (rc) { + nvdimm_bus_unregister(acpi_desc->nvdimm_bus); + return rc; + } + + if (nfit_test->setup != nfit_test0_setup) + return 0; + + nfit_test->setup_hotplug = 1; + nfit_test->setup(nfit_test); + + rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size); + if (rc) { + nvdimm_bus_unregister(acpi_desc->nvdimm_bus); + return rc; + } + + return 0; +} + +static int nfit_test_remove(struct platform_device *pdev) +{ + struct nfit_test *nfit_test = to_nfit_test(&pdev->dev); + struct acpi_nfit_desc *acpi_desc = &nfit_test->acpi_desc; + + nvdimm_bus_unregister(acpi_desc->nvdimm_bus); + + return 0; +} + +static void nfit_test_release(struct device *dev) +{ + struct nfit_test *nfit_test = to_nfit_test(dev); + + kfree(nfit_test); +} + +static const struct platform_device_id nfit_test_id[] = { + { KBUILD_MODNAME }, + { }, +}; + +static struct platform_driver nfit_test_driver = { + .probe = nfit_test_probe, + .remove = nfit_test_remove, + .driver = { + .name = KBUILD_MODNAME, + }, + .id_table = nfit_test_id, +}; + +#ifdef CONFIG_CMA_SIZE_MBYTES +#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES +#else +#define CMA_SIZE_MBYTES 0 +#endif + +static __init int nfit_test_init(void) +{ + int rc, i; + + nfit_test_setup(nfit_test_lookup); + + for (i = 0; i < NUM_NFITS; i++) { + struct nfit_test *nfit_test; + struct platform_device *pdev; + static int once; + + nfit_test = kzalloc(sizeof(*nfit_test), GFP_KERNEL); + if (!nfit_test) { + rc = -ENOMEM; + goto err_register; + } + INIT_LIST_HEAD(&nfit_test->resources); + switch (i) { + case 0: + nfit_test->num_pm = NUM_PM; + nfit_test->num_dcr = NUM_DCR; + nfit_test->alloc = nfit_test0_alloc; + nfit_test->setup = nfit_test0_setup; + break; + case 1: + nfit_test->num_pm = 1; + nfit_test->alloc = nfit_test1_alloc; + nfit_test->setup = nfit_test1_setup; + break; + default: + rc = -EINVAL; + goto err_register; + } + pdev = &nfit_test->pdev; + pdev->name = KBUILD_MODNAME; + pdev->id = i; + pdev->dev.release = nfit_test_release; + rc = platform_device_register(pdev); + if (rc) { + put_device(&pdev->dev); + goto err_register; + } + + rc = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (rc) + goto err_register; + + instances[i] = nfit_test; + + if (!once++) { + dma_addr_t dma; + void *buf; + + buf = dma_alloc_coherent(&pdev->dev, SZ_128M, &dma, + GFP_KERNEL); + if (!buf) { + rc = -ENOMEM; + dev_warn(&pdev->dev, "need 128M of free cma\n"); + goto err_register; + } + dma_free_coherent(&pdev->dev, SZ_128M, buf, dma); + } + } + + rc = platform_driver_register(&nfit_test_driver); + if (rc) + goto err_register; + return 0; + + err_register: + for (i = 0; i < NUM_NFITS; i++) + if (instances[i]) + platform_device_unregister(&instances[i]->pdev); + nfit_test_teardown(); + return rc; +} + +static __exit void nfit_test_exit(void) +{ + int i; + + platform_driver_unregister(&nfit_test_driver); + for (i = 0; i < NUM_NFITS; i++) + platform_device_unregister(&instances[i]->pdev); + nfit_test_teardown(); +} + +module_init(nfit_test_init); +module_exit(nfit_test_exit); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Intel Corporation"); diff --git a/kernel/tools/testing/nvdimm/test/nfit_test.h b/kernel/tools/testing/nvdimm/test/nfit_test.h new file mode 100644 index 000000000..96c5e16d7 --- /dev/null +++ b/kernel/tools/testing/nvdimm/test/nfit_test.h @@ -0,0 +1,29 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __NFIT_TEST_H__ +#define __NFIT_TEST_H__ + +struct nfit_test_resource { + struct list_head list; + struct resource *res; + struct device *dev; + void *buf; +}; + +typedef struct nfit_test_resource *(*nfit_test_lookup_fn)(resource_size_t); +void __iomem *__wrap_ioremap_nocache(resource_size_t offset, + unsigned long size); +void __wrap_iounmap(volatile void __iomem *addr); +void nfit_test_setup(nfit_test_lookup_fn lookup); +void nfit_test_teardown(void); +#endif diff --git a/kernel/tools/testing/selftests/Makefile b/kernel/tools/testing/selftests/Makefile index f76830643..c8edff680 100644 --- a/kernel/tools/testing/selftests/Makefile +++ b/kernel/tools/testing/selftests/Makefile @@ -4,21 +4,32 @@ TARGETS += efivarfs TARGETS += exec TARGETS += firmware TARGETS += ftrace +TARGETS += futex TARGETS += kcmp +TARGETS += lib +TARGETS += membarrier TARGETS += memfd TARGETS += memory-hotplug TARGETS += mount TARGETS += mqueue TARGETS += net TARGETS += powerpc +TARGETS += pstore TARGETS += ptrace +TARGETS += seccomp TARGETS += size +TARGETS += static_keys TARGETS += sysctl +ifneq (1, $(quicktest)) TARGETS += timers +endif TARGETS += user TARGETS += vm TARGETS += x86 +TARGETS += zram #Please keep the TARGETS list alphabetically sorted +# Run "make quicktest=1 run_tests" or +# "make quicktest=1 kselftest from top level Makefile TARGETS_HOTPLUG = cpu-hotplug TARGETS_HOTPLUG += memory-hotplug @@ -56,6 +67,9 @@ clean_hotplug: make -C $$TARGET clean; \ done; +run_pstore_crash: + make -C pstore run_crash + INSTALL_PATH ?= install INSTALL_PATH := $(abspath $(INSTALL_PATH)) ALL_SCRIPT := $(INSTALL_PATH)/run_kselftest.sh @@ -65,7 +79,6 @@ ifdef INSTALL_PATH @# Ask all targets to install their files mkdir -p $(INSTALL_PATH) for TARGET in $(TARGETS); do \ - mkdir -p $(INSTALL_PATH)/$$TARGET ; \ make -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \ done; diff --git a/kernel/tools/testing/selftests/breakpoints/Makefile b/kernel/tools/testing/selftests/breakpoints/Makefile index 182235640..c0d957015 100644 --- a/kernel/tools/testing/selftests/breakpoints/Makefile +++ b/kernel/tools/testing/selftests/breakpoints/Makefile @@ -1,22 +1,12 @@ # Taken from perf makefile uname_M := $(shell uname -m 2>/dev/null || echo not) -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/) -ifeq ($(ARCH),i386) - ARCH := x86 -endif -ifeq ($(ARCH),x86_64) - ARCH := x86 -endif - +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) -all: ifeq ($(ARCH),x86) - gcc breakpoint_test.c -o breakpoint_test -else - echo "Not an x86 target, can't build breakpoints selftests" +TEST_PROGS := breakpoint_test endif -TEST_PROGS := breakpoint_test +all: $(TEST_PROGS) include ../lib.mk diff --git a/kernel/tools/testing/selftests/capabilities/.gitignore b/kernel/tools/testing/selftests/capabilities/.gitignore new file mode 100644 index 000000000..b732dd0d4 --- /dev/null +++ b/kernel/tools/testing/selftests/capabilities/.gitignore @@ -0,0 +1,2 @@ +test_execve +validate_cap diff --git a/kernel/tools/testing/selftests/capabilities/Makefile b/kernel/tools/testing/selftests/capabilities/Makefile new file mode 100644 index 000000000..8c8f0c1f0 --- /dev/null +++ b/kernel/tools/testing/selftests/capabilities/Makefile @@ -0,0 +1,18 @@ +all: + +include ../lib.mk + +.PHONY: all clean + +TARGETS := validate_cap test_execve +TEST_PROGS := test_execve + +CFLAGS := -O2 -g -std=gnu99 -Wall -lcap-ng + +all: $(TARGETS) + +clean: + $(RM) $(TARGETS) + +$(TARGETS): %: %.c + $(CC) -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl diff --git a/kernel/tools/testing/selftests/capabilities/test_execve.c b/kernel/tools/testing/selftests/capabilities/test_execve.c new file mode 100644 index 000000000..10a21a958 --- /dev/null +++ b/kernel/tools/testing/selftests/capabilities/test_execve.c @@ -0,0 +1,427 @@ +#define _GNU_SOURCE + +#include <cap-ng.h> +#include <err.h> +#include <linux/capability.h> +#include <stdbool.h> +#include <string.h> +#include <stdio.h> +#include <fcntl.h> +#include <errno.h> +#include <stdarg.h> +#include <sched.h> +#include <sys/mount.h> +#include <limits.h> +#include <libgen.h> +#include <malloc.h> +#include <sys/wait.h> +#include <sys/prctl.h> +#include <sys/stat.h> + +#ifndef PR_CAP_AMBIENT +#define PR_CAP_AMBIENT 47 +# define PR_CAP_AMBIENT_IS_SET 1 +# define PR_CAP_AMBIENT_RAISE 2 +# define PR_CAP_AMBIENT_LOWER 3 +# define PR_CAP_AMBIENT_CLEAR_ALL 4 +#endif + +static int nerrs; + +static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap) +{ + char buf[4096]; + int fd; + ssize_t written; + int buf_len; + + buf_len = vsnprintf(buf, sizeof(buf), fmt, ap); + if (buf_len < 0) { + err(1, "vsnprintf failed"); + } + if (buf_len >= sizeof(buf)) { + errx(1, "vsnprintf output truncated"); + } + + fd = open(filename, O_WRONLY); + if (fd < 0) { + if ((errno == ENOENT) && enoent_ok) + return; + err(1, "open of %s failed", filename); + } + written = write(fd, buf, buf_len); + if (written != buf_len) { + if (written >= 0) { + errx(1, "short write to %s", filename); + } else { + err(1, "write to %s failed", filename); + } + } + if (close(fd) != 0) { + err(1, "close of %s failed", filename); + } +} + +static void maybe_write_file(char *filename, char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vmaybe_write_file(true, filename, fmt, ap); + va_end(ap); +} + +static void write_file(char *filename, char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vmaybe_write_file(false, filename, fmt, ap); + va_end(ap); +} + +static bool create_and_enter_ns(uid_t inner_uid) +{ + uid_t outer_uid; + gid_t outer_gid; + int i; + bool have_outer_privilege; + + outer_uid = getuid(); + outer_gid = getgid(); + + /* + * TODO: If we're already root, we could skip creating the userns. + */ + + if (unshare(CLONE_NEWNS) == 0) { + printf("[NOTE]\tUsing global UIDs for tests\n"); + if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) != 0) + err(1, "PR_SET_KEEPCAPS"); + if (setresuid(inner_uid, inner_uid, -1) != 0) + err(1, "setresuid"); + + // Re-enable effective caps + capng_get_caps_process(); + for (i = 0; i < CAP_LAST_CAP; i++) + if (capng_have_capability(CAPNG_PERMITTED, i)) + capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, i); + if (capng_apply(CAPNG_SELECT_CAPS) != 0) + err(1, "capng_apply"); + + have_outer_privilege = true; + } else if (unshare(CLONE_NEWUSER | CLONE_NEWNS) == 0) { + printf("[NOTE]\tUsing a user namespace for tests\n"); + maybe_write_file("/proc/self/setgroups", "deny"); + write_file("/proc/self/uid_map", "%d %d 1", inner_uid, outer_uid); + write_file("/proc/self/gid_map", "0 %d 1", outer_gid); + + have_outer_privilege = false; + } else { + errx(1, "must be root or be able to create a userns"); + } + + if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL) != 0) + err(1, "remount everything private"); + + return have_outer_privilege; +} + +static void chdir_to_tmpfs(void) +{ + char cwd[PATH_MAX]; + if (getcwd(cwd, sizeof(cwd)) != cwd) + err(1, "getcwd"); + + if (mount("private_tmp", ".", "tmpfs", 0, "mode=0777") != 0) + err(1, "mount private tmpfs"); + + if (chdir(cwd) != 0) + err(1, "chdir to private tmpfs"); + + if (umount2(".", MNT_DETACH) != 0) + err(1, "detach private tmpfs"); +} + +static void copy_fromat_to(int fromfd, const char *fromname, const char *toname) +{ + int from = openat(fromfd, fromname, O_RDONLY); + if (from == -1) + err(1, "open copy source"); + + int to = open(toname, O_CREAT | O_WRONLY | O_EXCL, 0700); + + while (true) { + char buf[4096]; + ssize_t sz = read(from, buf, sizeof(buf)); + if (sz == 0) + break; + if (sz < 0) + err(1, "read"); + + if (write(to, buf, sz) != sz) + err(1, "write"); /* no short writes on tmpfs */ + } + + close(from); + close(to); +} + +static bool fork_wait(void) +{ + pid_t child = fork(); + if (child == 0) { + nerrs = 0; + return true; + } else if (child > 0) { + int status; + if (waitpid(child, &status, 0) != child || + !WIFEXITED(status)) { + printf("[FAIL]\tChild died\n"); + nerrs++; + } else if (WEXITSTATUS(status) != 0) { + printf("[FAIL]\tChild failed\n"); + nerrs++; + } else { + printf("[OK]\tChild succeeded\n"); + } + + return false; + } else { + err(1, "fork"); + } +} + +static void exec_other_validate_cap(const char *name, + bool eff, bool perm, bool inh, bool ambient) +{ + execl(name, name, (eff ? "1" : "0"), + (perm ? "1" : "0"), (inh ? "1" : "0"), (ambient ? "1" : "0"), + NULL); + err(1, "execl"); +} + +static void exec_validate_cap(bool eff, bool perm, bool inh, bool ambient) +{ + exec_other_validate_cap("./validate_cap", eff, perm, inh, ambient); +} + +static int do_tests(int uid, const char *our_path) +{ + bool have_outer_privilege = create_and_enter_ns(uid); + + int ourpath_fd = open(our_path, O_RDONLY | O_DIRECTORY); + if (ourpath_fd == -1) + err(1, "open '%s'", our_path); + + chdir_to_tmpfs(); + + copy_fromat_to(ourpath_fd, "validate_cap", "validate_cap"); + + if (have_outer_privilege) { + uid_t gid = getegid(); + + copy_fromat_to(ourpath_fd, "validate_cap", + "validate_cap_suidroot"); + if (chown("validate_cap_suidroot", 0, -1) != 0) + err(1, "chown"); + if (chmod("validate_cap_suidroot", S_ISUID | 0700) != 0) + err(1, "chmod"); + + copy_fromat_to(ourpath_fd, "validate_cap", + "validate_cap_suidnonroot"); + if (chown("validate_cap_suidnonroot", uid + 1, -1) != 0) + err(1, "chown"); + if (chmod("validate_cap_suidnonroot", S_ISUID | 0700) != 0) + err(1, "chmod"); + + copy_fromat_to(ourpath_fd, "validate_cap", + "validate_cap_sgidroot"); + if (chown("validate_cap_sgidroot", -1, 0) != 0) + err(1, "chown"); + if (chmod("validate_cap_sgidroot", S_ISGID | 0710) != 0) + err(1, "chmod"); + + copy_fromat_to(ourpath_fd, "validate_cap", + "validate_cap_sgidnonroot"); + if (chown("validate_cap_sgidnonroot", -1, gid + 1) != 0) + err(1, "chown"); + if (chmod("validate_cap_sgidnonroot", S_ISGID | 0710) != 0) + err(1, "chmod"); +} + + capng_get_caps_process(); + + /* Make sure that i starts out clear */ + capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE); + if (capng_apply(CAPNG_SELECT_CAPS) != 0) + err(1, "capng_apply"); + + if (uid == 0) { + printf("[RUN]\tRoot => ep\n"); + if (fork_wait()) + exec_validate_cap(true, true, false, false); + } else { + printf("[RUN]\tNon-root => no caps\n"); + if (fork_wait()) + exec_validate_cap(false, false, false, false); + } + + printf("[OK]\tCheck cap_ambient manipulation rules\n"); + + /* We should not be able to add ambient caps yet. */ + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != -1 || errno != EPERM) { + if (errno == EINVAL) + printf("[FAIL]\tPR_CAP_AMBIENT_RAISE isn't supported\n"); + else + printf("[FAIL]\tPR_CAP_AMBIENT_RAISE should have failed eith EPERM on a non-inheritable cap\n"); + return 1; + } + printf("[OK]\tPR_CAP_AMBIENT_RAISE failed on non-inheritable cap\n"); + + capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_RAW); + capng_update(CAPNG_DROP, CAPNG_PERMITTED, CAP_NET_RAW); + capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, CAP_NET_RAW); + if (capng_apply(CAPNG_SELECT_CAPS) != 0) + err(1, "capng_apply"); + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_RAW, 0, 0, 0) != -1 || errno != EPERM) { + printf("[FAIL]\tPR_CAP_AMBIENT_RAISE should have failed on a non-permitted cap\n"); + return 1; + } + printf("[OK]\tPR_CAP_AMBIENT_RAISE failed on non-permitted cap\n"); + + capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE); + if (capng_apply(CAPNG_SELECT_CAPS) != 0) + err(1, "capng_apply"); + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) { + printf("[FAIL]\tPR_CAP_AMBIENT_RAISE should have succeeded\n"); + return 1; + } + printf("[OK]\tPR_CAP_AMBIENT_RAISE worked\n"); + + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 1) { + printf("[FAIL]\tPR_CAP_AMBIENT_IS_SET is broken\n"); + return 1; + } + + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0, 0) != 0) + err(1, "PR_CAP_AMBIENT_CLEAR_ALL"); + + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) { + printf("[FAIL]\tPR_CAP_AMBIENT_CLEAR_ALL didn't work\n"); + return 1; + } + + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) + err(1, "PR_CAP_AMBIENT_RAISE"); + + capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE); + if (capng_apply(CAPNG_SELECT_CAPS) != 0) + err(1, "capng_apply"); + + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) { + printf("[FAIL]\tDropping I should have dropped A\n"); + return 1; + } + + printf("[OK]\tBasic manipulation appears to work\n"); + + capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE); + if (capng_apply(CAPNG_SELECT_CAPS) != 0) + err(1, "capng_apply"); + if (uid == 0) { + printf("[RUN]\tRoot +i => eip\n"); + if (fork_wait()) + exec_validate_cap(true, true, true, false); + } else { + printf("[RUN]\tNon-root +i => i\n"); + if (fork_wait()) + exec_validate_cap(false, false, true, false); + } + + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) + err(1, "PR_CAP_AMBIENT_RAISE"); + + printf("[RUN]\tUID %d +ia => eipa\n", uid); + if (fork_wait()) + exec_validate_cap(true, true, true, true); + + /* The remaining tests need real privilege */ + + if (!have_outer_privilege) { + printf("[SKIP]\tSUID/SGID tests (needs privilege)\n"); + goto done; + } + + if (uid == 0) { + printf("[RUN]\tRoot +ia, suidroot => eipa\n"); + if (fork_wait()) + exec_other_validate_cap("./validate_cap_suidroot", + true, true, true, true); + + printf("[RUN]\tRoot +ia, suidnonroot => ip\n"); + if (fork_wait()) + exec_other_validate_cap("./validate_cap_suidnonroot", + false, true, true, false); + + printf("[RUN]\tRoot +ia, sgidroot => eipa\n"); + if (fork_wait()) + exec_other_validate_cap("./validate_cap_sgidroot", + true, true, true, true); + + if (fork_wait()) { + printf("[RUN]\tRoot, gid != 0, +ia, sgidroot => eip\n"); + if (setresgid(1, 1, 1) != 0) + err(1, "setresgid"); + exec_other_validate_cap("./validate_cap_sgidroot", + true, true, true, false); + } + + printf("[RUN]\tRoot +ia, sgidnonroot => eip\n"); + if (fork_wait()) + exec_other_validate_cap("./validate_cap_sgidnonroot", + true, true, true, false); + } else { + printf("[RUN]\tNon-root +ia, sgidnonroot => i\n"); + exec_other_validate_cap("./validate_cap_sgidnonroot", + false, false, true, false); + + if (fork_wait()) { + printf("[RUN]\tNon-root +ia, sgidroot => i\n"); + if (setresgid(1, 1, 1) != 0) + err(1, "setresgid"); + exec_other_validate_cap("./validate_cap_sgidroot", + false, false, true, false); + } + } + +done: + return nerrs ? 1 : 0; +} + +int main(int argc, char **argv) +{ + char *tmp1, *tmp2, *our_path; + + /* Find our path */ + tmp1 = strdup(argv[0]); + if (!tmp1) + err(1, "strdup"); + tmp2 = dirname(tmp1); + our_path = strdup(tmp2); + if (!our_path) + err(1, "strdup"); + free(tmp1); + + if (fork_wait()) { + printf("[RUN]\t+++ Tests with uid == 0 +++\n"); + return do_tests(0, our_path); + } + + if (fork_wait()) { + printf("[RUN]\t+++ Tests with uid != 0 +++\n"); + return do_tests(1, our_path); + } + + return nerrs ? 1 : 0; +} diff --git a/kernel/tools/testing/selftests/capabilities/validate_cap.c b/kernel/tools/testing/selftests/capabilities/validate_cap.c new file mode 100644 index 000000000..dd3c45f7b --- /dev/null +++ b/kernel/tools/testing/selftests/capabilities/validate_cap.c @@ -0,0 +1,73 @@ +#include <cap-ng.h> +#include <err.h> +#include <linux/capability.h> +#include <stdbool.h> +#include <string.h> +#include <stdio.h> +#include <sys/prctl.h> +#include <sys/auxv.h> + +#ifndef PR_CAP_AMBIENT +#define PR_CAP_AMBIENT 47 +# define PR_CAP_AMBIENT_IS_SET 1 +# define PR_CAP_AMBIENT_RAISE 2 +# define PR_CAP_AMBIENT_LOWER 3 +# define PR_CAP_AMBIENT_CLEAR_ALL 4 +#endif + +#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 19) +# define HAVE_GETAUXVAL +#endif + +static bool bool_arg(char **argv, int i) +{ + if (!strcmp(argv[i], "0")) + return false; + else if (!strcmp(argv[i], "1")) + return true; + else + errx(1, "wrong argv[%d]", i); +} + +int main(int argc, char **argv) +{ + const char *atsec = ""; + + /* + * Be careful just in case a setgid or setcapped copy of this + * helper gets out. + */ + + if (argc != 5) + errx(1, "wrong argc"); + +#ifdef HAVE_GETAUXVAL + if (getauxval(AT_SECURE)) + atsec = " (AT_SECURE is set)"; + else + atsec = " (AT_SECURE is not set)"; +#endif + + capng_get_caps_process(); + + if (capng_have_capability(CAPNG_EFFECTIVE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 1)) { + printf("[FAIL]\tWrong effective state%s\n", atsec); + return 1; + } + if (capng_have_capability(CAPNG_PERMITTED, CAP_NET_BIND_SERVICE) != bool_arg(argv, 2)) { + printf("[FAIL]\tWrong permitted state%s\n", atsec); + return 1; + } + if (capng_have_capability(CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 3)) { + printf("[FAIL]\tWrong inheritable state%s\n", atsec); + return 1; + } + + if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != bool_arg(argv, 4)) { + printf("[FAIL]\tWrong ambient state%s\n", atsec); + return 1; + } + + printf("[OK]\tCapabilities after execve were correct\n"); + return 0; +} diff --git a/kernel/tools/testing/selftests/efivarfs/.gitignore b/kernel/tools/testing/selftests/efivarfs/.gitignore new file mode 100644 index 000000000..336184935 --- /dev/null +++ b/kernel/tools/testing/selftests/efivarfs/.gitignore @@ -0,0 +1,2 @@ +create-read +open-unlink diff --git a/kernel/tools/testing/selftests/efivarfs/efivarfs.sh b/kernel/tools/testing/selftests/efivarfs/efivarfs.sh index 77edcdcc0..057278448 100755 --- a/kernel/tools/testing/selftests/efivarfs/efivarfs.sh +++ b/kernel/tools/testing/selftests/efivarfs/efivarfs.sh @@ -88,7 +88,11 @@ test_delete() exit 1 fi - rm $file + rm $file 2>/dev/null + if [ $? -ne 0 ]; then + chattr -i $file + rm $file + fi if [ -e $file ]; then echo "$file couldn't be deleted" >&2 @@ -111,6 +115,7 @@ test_zero_size_delete() exit 1 fi + chattr -i $file printf "$attrs" > $file if [ -e $file ]; then @@ -141,7 +146,11 @@ test_valid_filenames() echo "$file could not be created" >&2 ret=1 else - rm $file + rm $file 2>/dev/null + if [ $? -ne 0 ]; then + chattr -i $file + rm $file + fi fi done @@ -174,7 +183,11 @@ test_invalid_filenames() if [ -e $file ]; then echo "Creating $file should have failed" >&2 - rm $file + rm $file 2>/dev/null + if [ $? -ne 0 ]; then + chattr -i $file + rm $file + fi ret=1 fi done diff --git a/kernel/tools/testing/selftests/efivarfs/open-unlink.c b/kernel/tools/testing/selftests/efivarfs/open-unlink.c index 8c0764407..4af74f733 100644 --- a/kernel/tools/testing/selftests/efivarfs/open-unlink.c +++ b/kernel/tools/testing/selftests/efivarfs/open-unlink.c @@ -1,10 +1,68 @@ +#include <errno.h> #include <stdio.h> #include <stdint.h> #include <stdlib.h> #include <unistd.h> +#include <sys/ioctl.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> +#include <linux/fs.h> + +static int set_immutable(const char *path, int immutable) +{ + unsigned int flags; + int fd; + int rc; + int error; + + fd = open(path, O_RDONLY); + if (fd < 0) + return fd; + + rc = ioctl(fd, FS_IOC_GETFLAGS, &flags); + if (rc < 0) { + error = errno; + close(fd); + errno = error; + return rc; + } + + if (immutable) + flags |= FS_IMMUTABLE_FL; + else + flags &= ~FS_IMMUTABLE_FL; + + rc = ioctl(fd, FS_IOC_SETFLAGS, &flags); + error = errno; + close(fd); + errno = error; + return rc; +} + +static int get_immutable(const char *path) +{ + unsigned int flags; + int fd; + int rc; + int error; + + fd = open(path, O_RDONLY); + if (fd < 0) + return fd; + + rc = ioctl(fd, FS_IOC_GETFLAGS, &flags); + if (rc < 0) { + error = errno; + close(fd); + errno = error; + return rc; + } + close(fd); + if (flags & FS_IMMUTABLE_FL) + return 1; + return 0; +} int main(int argc, char **argv) { @@ -27,7 +85,7 @@ int main(int argc, char **argv) buf[4] = 0; /* create a test variable */ - fd = open(path, O_WRONLY | O_CREAT); + fd = open(path, O_WRONLY | O_CREAT, 0600); if (fd < 0) { perror("open(O_WRONLY)"); return EXIT_FAILURE; @@ -41,6 +99,18 @@ int main(int argc, char **argv) close(fd); + rc = get_immutable(path); + if (rc < 0) { + perror("ioctl(FS_IOC_GETFLAGS)"); + return EXIT_FAILURE; + } else if (rc) { + rc = set_immutable(path, 0); + if (rc < 0) { + perror("ioctl(FS_IOC_SETFLAGS)"); + return EXIT_FAILURE; + } + } + fd = open(path, O_RDONLY); if (fd < 0) { perror("open"); diff --git a/kernel/tools/testing/selftests/exec/Makefile b/kernel/tools/testing/selftests/exec/Makefile index 4edb7d0da..4e400eb83 100644 --- a/kernel/tools/testing/selftests/exec/Makefile +++ b/kernel/tools/testing/selftests/exec/Makefile @@ -22,7 +22,5 @@ TEST_FILES := $(DEPS) include ../lib.mk -override EMIT_TESTS := echo "mkdir -p subdir; (./execveat && echo \"selftests: execveat [PASS]\") || echo \"selftests: execveat [FAIL]\"" - clean: rm -rf $(BINARIES) $(DEPS) subdir.moved execveat.moved xxxxx* diff --git a/kernel/tools/testing/selftests/firmware/fw_filesystem.sh b/kernel/tools/testing/selftests/firmware/fw_filesystem.sh index 3fc6c10c2..c4366dc74 100755 --- a/kernel/tools/testing/selftests/firmware/fw_filesystem.sh +++ b/kernel/tools/testing/selftests/firmware/fw_filesystem.sh @@ -9,7 +9,15 @@ modprobe test_firmware DIR=/sys/devices/virtual/misc/test_firmware -OLD_TIMEOUT=$(cat /sys/class/firmware/timeout) +# CONFIG_FW_LOADER_USER_HELPER has a sysfs class under /sys/class/firmware/ +# These days no one enables CONFIG_FW_LOADER_USER_HELPER so check for that +# as an indicator for CONFIG_FW_LOADER_USER_HELPER. +HAS_FW_LOADER_USER_HELPER=$(if [ -d /sys/class/firmware/ ]; then echo yes; else echo no; fi) + +if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then + OLD_TIMEOUT=$(cat /sys/class/firmware/timeout) +fi + OLD_FWPATH=$(cat /sys/module/firmware_class/parameters/path) FWPATH=$(mktemp -d) @@ -17,7 +25,9 @@ FW="$FWPATH/test-firmware.bin" test_finish() { - echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout + if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then + echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout + fi echo -n "$OLD_PATH" >/sys/module/firmware_class/parameters/path rm -f "$FW" rmdir "$FWPATH" @@ -25,8 +35,11 @@ test_finish() trap "test_finish" EXIT -# Turn down the timeout so failures don't take so long. -echo 1 >/sys/class/firmware/timeout +if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then + # Turn down the timeout so failures don't take so long. + echo 1 >/sys/class/firmware/timeout +fi + # Set the kernel search path. echo -n "$FWPATH" >/sys/module/firmware_class/parameters/path @@ -41,7 +54,9 @@ if diff -q "$FW" /dev/test_firmware >/dev/null ; then echo "$0: firmware was not expected to match" >&2 exit 1 else - echo "$0: timeout works" + if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then + echo "$0: timeout works" + fi fi # This should succeed via kernel load or will fail after 1 second after diff --git a/kernel/tools/testing/selftests/firmware/fw_userhelper.sh b/kernel/tools/testing/selftests/firmware/fw_userhelper.sh index 6efbade12..b9983f8e0 100755 --- a/kernel/tools/testing/selftests/firmware/fw_userhelper.sh +++ b/kernel/tools/testing/selftests/firmware/fw_userhelper.sh @@ -9,7 +9,17 @@ modprobe test_firmware DIR=/sys/devices/virtual/misc/test_firmware -OLD_TIMEOUT=$(cat /sys/class/firmware/timeout) +# CONFIG_FW_LOADER_USER_HELPER has a sysfs class under /sys/class/firmware/ +# These days no one enables CONFIG_FW_LOADER_USER_HELPER so check for that +# as an indicator for CONFIG_FW_LOADER_USER_HELPER. +HAS_FW_LOADER_USER_HELPER=$(if [ -d /sys/class/firmware/ ]; then echo yes; else echo no; fi) + +if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then + OLD_TIMEOUT=$(cat /sys/class/firmware/timeout) +else + echo "usermode helper disabled so ignoring test" + exit 0 +fi FWPATH=$(mktemp -d) FW="$FWPATH/test-firmware.bin" diff --git a/kernel/tools/testing/selftests/ftrace/Makefile b/kernel/tools/testing/selftests/ftrace/Makefile index 346720639..4e6ed13e7 100644 --- a/kernel/tools/testing/selftests/ftrace/Makefile +++ b/kernel/tools/testing/selftests/ftrace/Makefile @@ -1,6 +1,7 @@ all: TEST_PROGS := ftracetest +TEST_DIRS := test.d include ../lib.mk diff --git a/kernel/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc b/kernel/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc index a5a426211..c3843ed49 100644 --- a/kernel/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc +++ b/kernel/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc @@ -5,7 +5,7 @@ echo 0 > events/enable echo > kprobe_events -echo p:myevent do_fork > kprobe_events +echo p:myevent _do_fork > kprobe_events grep myevent kprobe_events test -d events/kprobes/myevent echo > kprobe_events diff --git a/kernel/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc b/kernel/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc index d8c7bb658..74507db8b 100644 --- a/kernel/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc +++ b/kernel/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc @@ -5,7 +5,7 @@ echo 0 > events/enable echo > kprobe_events -echo p:myevent do_fork > kprobe_events +echo p:myevent _do_fork > kprobe_events test -d events/kprobes/myevent echo 1 > events/kprobes/myevent/enable echo > kprobe_events && exit 1 # this must fail diff --git a/kernel/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc b/kernel/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc index c45ee2761..64949d4ed 100644 --- a/kernel/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc +++ b/kernel/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc @@ -5,7 +5,7 @@ echo 0 > events/enable echo > kprobe_events -echo 'p:testprobe do_fork $stack $stack0 +0($stack)' > kprobe_events +echo 'p:testprobe _do_fork $stack $stack0 +0($stack)' > kprobe_events grep testprobe kprobe_events test -d events/kprobes/testprobe echo 1 > events/kprobes/testprobe/enable diff --git a/kernel/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/kernel/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc index ab41d2b29..d6f2f4965 100644 --- a/kernel/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc +++ b/kernel/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc @@ -6,31 +6,31 @@ grep function available_tracers || exit_unsupported # this is configurable # prepare echo nop > current_tracer -echo do_fork > set_ftrace_filter +echo _do_fork > set_ftrace_filter echo 0 > events/enable echo > kprobe_events -echo 'p:testprobe do_fork' > kprobe_events +echo 'p:testprobe _do_fork' > kprobe_events # kprobe on / ftrace off echo 1 > events/kprobes/testprobe/enable echo > trace ( echo "forked") grep testprobe trace -! grep 'do_fork <-' trace +! grep '_do_fork <-' trace # kprobe on / ftrace on echo function > current_tracer echo > trace ( echo "forked") grep testprobe trace -grep 'do_fork <-' trace +grep '_do_fork <-' trace # kprobe off / ftrace on echo 0 > events/kprobes/testprobe/enable echo > trace ( echo "forked") ! grep testprobe trace -grep 'do_fork <-' trace +grep '_do_fork <-' trace # kprobe on / ftrace on echo 1 > events/kprobes/testprobe/enable @@ -38,14 +38,14 @@ echo function > current_tracer echo > trace ( echo "forked") grep testprobe trace -grep 'do_fork <-' trace +grep '_do_fork <-' trace # kprobe on / ftrace off echo nop > current_tracer echo > trace ( echo "forked") grep testprobe trace -! grep 'do_fork <-' trace +! grep '_do_fork <-' trace # cleanup echo nop > current_tracer diff --git a/kernel/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc b/kernel/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc index 31717985a..0d0954625 100644 --- a/kernel/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc +++ b/kernel/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc @@ -5,7 +5,7 @@ echo 0 > events/enable echo > kprobe_events -echo 'r:testprobe2 do_fork $retval' > kprobe_events +echo 'r:testprobe2 _do_fork $retval' > kprobe_events grep testprobe2 kprobe_events test -d events/kprobes/testprobe2 echo 1 > events/kprobes/testprobe2/enable diff --git a/kernel/tools/testing/selftests/futex/Makefile b/kernel/tools/testing/selftests/futex/Makefile new file mode 100644 index 000000000..6a1752956 --- /dev/null +++ b/kernel/tools/testing/selftests/futex/Makefile @@ -0,0 +1,29 @@ +SUBDIRS := functional + +TEST_PROGS := run.sh + +.PHONY: all clean +all: + for DIR in $(SUBDIRS); do $(MAKE) -C $$DIR $@ ; done + +include ../lib.mk + +override define RUN_TESTS + ./run.sh +endef + +override define INSTALL_RULE + mkdir -p $(INSTALL_PATH) + install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) + + @for SUBDIR in $(SUBDIRS); do \ + $(MAKE) -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \ + done; +endef + +override define EMIT_TESTS + echo "./run.sh" +endef + +clean: + for DIR in $(SUBDIRS); do $(MAKE) -C $$DIR $@ ; done diff --git a/kernel/tools/testing/selftests/futex/README b/kernel/tools/testing/selftests/futex/README new file mode 100644 index 000000000..0558bb9ce --- /dev/null +++ b/kernel/tools/testing/selftests/futex/README @@ -0,0 +1,62 @@ +Futex Test +========== +Futex Test is intended to thoroughly test the Linux kernel futex system call +API. + +Functional tests shall test the documented behavior of the futex operation +code under test. This includes checking for proper behavior under normal use, +odd corner cases, regression tests, and abject abuse and misuse. + +Futextest will also provide example implementation of mutual exclusion +primitives. These can be used as is in user applications or can serve as +examples for system libraries. These will likely be added to either a new lib/ +directory or purely as header files under include/, I'm leaning toward the +latter. + +Quick Start +----------- +# make +# ./run.sh + +Design and Implementation Goals +------------------------------- +o Tests should be as self contained as is practical so as to facilitate sharing + the individual tests on mailing list discussions and bug reports. +o The build system shall remain as simple as possible, avoiding any archive or + shared object building and linking. +o Where possible, any helper functions or other package-wide code shall be + implemented in header files, avoiding the need to compile intermediate object + files. +o External dependencies shall remain as minimal as possible. Currently gcc + and glibc are the only dependencies. +o Tests return 0 for success and < 0 for failure. + +Output Formatting +----------------- +Test output shall be easily parsable by both human and machine. Title and +results are printed to stdout, while intermediate ERROR or FAIL messages are +sent to stderr. Tests shall support the -c option to print PASS, FAIL, and +ERROR strings in color for easy visual parsing. Output shall conform to the +following format: + +test_name: Description of the test + Arguments: arg1=val1 #units specified for clarity where appropriate + ERROR: Description of unexpected error + FAIL: Reason for test failure + # FIXME: Perhaps an " INFO: informational message" option would be + # useful here. Using -v to toggle it them on and off, as with -c. + # there may be multiple ERROR or FAIL messages +Result: (PASS|FAIL|ERROR) + +Naming +------ +o FIXME: decide on a sane test naming scheme. Currently the tests are named + based on the primary futex operation they test. Eventually this will become a + problem as we intend to write multiple tests which collide in this namespace. + Perhaps something like "wait-wake-1" "wait-wake-2" is adequate, leaving the + detailed description in the test source and the output. + +Coding Style +------------ +o The Futex Test project adheres to the coding standards set forth by Linux + kernel as defined in the Linux source Documentation/CodingStyle. diff --git a/kernel/tools/testing/selftests/futex/functional/.gitignore b/kernel/tools/testing/selftests/futex/functional/.gitignore new file mode 100644 index 000000000..a09f57061 --- /dev/null +++ b/kernel/tools/testing/selftests/futex/functional/.gitignore @@ -0,0 +1,7 @@ +futex_requeue_pi +futex_requeue_pi_mismatched_ops +futex_requeue_pi_signal_restart +futex_wait_private_mapped_file +futex_wait_timeout +futex_wait_uninitialized_heap +futex_wait_wouldblock diff --git a/kernel/tools/testing/selftests/futex/functional/Makefile b/kernel/tools/testing/selftests/futex/functional/Makefile new file mode 100644 index 000000000..9d6b75ef7 --- /dev/null +++ b/kernel/tools/testing/selftests/futex/functional/Makefile @@ -0,0 +1,25 @@ +INCLUDES := -I../include -I../../ +CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES) +LDFLAGS := $(LDFLAGS) -pthread -lrt + +HEADERS := ../include/futextest.h +TARGETS := \ + futex_wait_timeout \ + futex_wait_wouldblock \ + futex_requeue_pi \ + futex_requeue_pi_signal_restart \ + futex_requeue_pi_mismatched_ops \ + futex_wait_uninitialized_heap \ + futex_wait_private_mapped_file + +TEST_PROGS := $(TARGETS) run.sh + +.PHONY: all clean +all: $(TARGETS) + +$(TARGETS): $(HEADERS) + +include ../../lib.mk + +clean: + rm -f $(TARGETS) diff --git a/kernel/tools/testing/selftests/futex/functional/futex_requeue_pi.c b/kernel/tools/testing/selftests/futex/functional/futex_requeue_pi.c new file mode 100644 index 000000000..3da06ad23 --- /dev/null +++ b/kernel/tools/testing/selftests/futex/functional/futex_requeue_pi.c @@ -0,0 +1,409 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2006-2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * This test excercises the futex syscall op codes needed for requeuing + * priority inheritance aware POSIX condition variables and mutexes. + * + * AUTHORS + * Sripathi Kodi <sripathik@in.ibm.com> + * Darren Hart <dvhart@linux.intel.com> + * + * HISTORY + * 2008-Jan-13: Initial version by Sripathi Kodi <sripathik@in.ibm.com> + * 2009-Nov-6: futex test adaptation by Darren Hart <dvhart@linux.intel.com> + * + *****************************************************************************/ + +#include <errno.h> +#include <limits.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <string.h> +#include "atomic.h" +#include "futextest.h" +#include "logging.h" + +#define MAX_WAKE_ITERS 1000 +#define THREAD_MAX 10 +#define SIGNAL_PERIOD_US 100 + +atomic_t waiters_blocked = ATOMIC_INITIALIZER; +atomic_t waiters_woken = ATOMIC_INITIALIZER; + +futex_t f1 = FUTEX_INITIALIZER; +futex_t f2 = FUTEX_INITIALIZER; +futex_t wake_complete = FUTEX_INITIALIZER; + +/* Test option defaults */ +static long timeout_ns; +static int broadcast; +static int owner; +static int locked; + +struct thread_arg { + long id; + struct timespec *timeout; + int lock; + int ret; +}; +#define THREAD_ARG_INITIALIZER { 0, NULL, 0, 0 } + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -b Broadcast wakeup (all waiters)\n"); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -l Lock the pi futex across requeue\n"); + printf(" -o Use a third party pi futex owner during requeue (cancels -l)\n"); + printf(" -t N Timeout in nanoseconds (default: 0)\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg, + int policy, int prio) +{ + int ret; + struct sched_param schedp; + pthread_attr_t attr; + + pthread_attr_init(&attr); + memset(&schedp, 0, sizeof(schedp)); + + ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED); + if (ret) { + error("pthread_attr_setinheritsched\n", ret); + return -1; + } + + ret = pthread_attr_setschedpolicy(&attr, policy); + if (ret) { + error("pthread_attr_setschedpolicy\n", ret); + return -1; + } + + schedp.sched_priority = prio; + ret = pthread_attr_setschedparam(&attr, &schedp); + if (ret) { + error("pthread_attr_setschedparam\n", ret); + return -1; + } + + ret = pthread_create(pth, &attr, func, arg); + if (ret) { + error("pthread_create\n", ret); + return -1; + } + return 0; +} + + +void *waiterfn(void *arg) +{ + struct thread_arg *args = (struct thread_arg *)arg; + futex_t old_val; + + info("Waiter %ld: running\n", args->id); + /* Each thread sleeps for a different amount of time + * This is to avoid races, because we don't lock the + * external mutex here */ + usleep(1000 * (long)args->id); + + old_val = f1; + atomic_inc(&waiters_blocked); + info("Calling futex_wait_requeue_pi: %p (%u) -> %p\n", + &f1, f1, &f2); + args->ret = futex_wait_requeue_pi(&f1, old_val, &f2, args->timeout, + FUTEX_PRIVATE_FLAG); + + info("waiter %ld woke with %d %s\n", args->id, args->ret, + args->ret < 0 ? strerror(errno) : ""); + atomic_inc(&waiters_woken); + if (args->ret < 0) { + if (args->timeout && errno == ETIMEDOUT) + args->ret = 0; + else { + args->ret = RET_ERROR; + error("futex_wait_requeue_pi\n", errno); + } + futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG); + } + futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); + + info("Waiter %ld: exiting with %d\n", args->id, args->ret); + pthread_exit((void *)&args->ret); +} + +void *broadcast_wakerfn(void *arg) +{ + struct thread_arg *args = (struct thread_arg *)arg; + int nr_requeue = INT_MAX; + int task_count = 0; + futex_t old_val; + int nr_wake = 1; + int i = 0; + + info("Waker: waiting for waiters to block\n"); + while (waiters_blocked.val < THREAD_MAX) + usleep(1000); + usleep(1000); + + info("Waker: Calling broadcast\n"); + if (args->lock) { + info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", f2, &f2); + futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG); + } + continue_requeue: + old_val = f1; + args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2, nr_wake, nr_requeue, + FUTEX_PRIVATE_FLAG); + if (args->ret < 0) { + args->ret = RET_ERROR; + error("FUTEX_CMP_REQUEUE_PI failed\n", errno); + } else if (++i < MAX_WAKE_ITERS) { + task_count += args->ret; + if (task_count < THREAD_MAX - waiters_woken.val) + goto continue_requeue; + } else { + error("max broadcast iterations (%d) reached with %d/%d tasks woken or requeued\n", + 0, MAX_WAKE_ITERS, task_count, THREAD_MAX); + args->ret = RET_ERROR; + } + + futex_wake(&wake_complete, 1, FUTEX_PRIVATE_FLAG); + + if (args->lock) + futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); + + if (args->ret > 0) + args->ret = task_count; + + info("Waker: exiting with %d\n", args->ret); + pthread_exit((void *)&args->ret); +} + +void *signal_wakerfn(void *arg) +{ + struct thread_arg *args = (struct thread_arg *)arg; + unsigned int old_val; + int nr_requeue = 0; + int task_count = 0; + int nr_wake = 1; + int i = 0; + + info("Waker: waiting for waiters to block\n"); + while (waiters_blocked.val < THREAD_MAX) + usleep(1000); + usleep(1000); + + while (task_count < THREAD_MAX && waiters_woken.val < THREAD_MAX) { + info("task_count: %d, waiters_woken: %d\n", + task_count, waiters_woken.val); + if (args->lock) { + info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", + f2, &f2); + futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG); + } + info("Waker: Calling signal\n"); + /* cond_signal */ + old_val = f1; + args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2, + nr_wake, nr_requeue, + FUTEX_PRIVATE_FLAG); + if (args->ret < 0) + args->ret = -errno; + info("futex: %x\n", f2); + if (args->lock) { + info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", + f2, &f2); + futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); + } + info("futex: %x\n", f2); + if (args->ret < 0) { + error("FUTEX_CMP_REQUEUE_PI failed\n", errno); + args->ret = RET_ERROR; + break; + } + + task_count += args->ret; + usleep(SIGNAL_PERIOD_US); + i++; + /* we have to loop at least THREAD_MAX times */ + if (i > MAX_WAKE_ITERS + THREAD_MAX) { + error("max signaling iterations (%d) reached, giving up on pending waiters.\n", + 0, MAX_WAKE_ITERS + THREAD_MAX); + args->ret = RET_ERROR; + break; + } + } + + futex_wake(&wake_complete, 1, FUTEX_PRIVATE_FLAG); + + if (args->ret >= 0) + args->ret = task_count; + + info("Waker: exiting with %d\n", args->ret); + info("Waker: waiters_woken: %d\n", waiters_woken.val); + pthread_exit((void *)&args->ret); +} + +void *third_party_blocker(void *arg) +{ + struct thread_arg *args = (struct thread_arg *)arg; + int ret2 = 0; + + args->ret = futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG); + if (args->ret) + goto out; + args->ret = futex_wait(&wake_complete, wake_complete, NULL, + FUTEX_PRIVATE_FLAG); + ret2 = futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); + + out: + if (args->ret || ret2) { + error("third_party_blocker() futex error", 0); + args->ret = RET_ERROR; + } + + pthread_exit((void *)&args->ret); +} + +int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns) +{ + void *(*wakerfn)(void *) = signal_wakerfn; + struct thread_arg blocker_arg = THREAD_ARG_INITIALIZER; + struct thread_arg waker_arg = THREAD_ARG_INITIALIZER; + pthread_t waiter[THREAD_MAX], waker, blocker; + struct timespec ts, *tsp = NULL; + struct thread_arg args[THREAD_MAX]; + int *waiter_ret; + int i, ret = RET_PASS; + + if (timeout_ns) { + time_t secs; + + info("timeout_ns = %ld\n", timeout_ns); + ret = clock_gettime(CLOCK_MONOTONIC, &ts); + secs = (ts.tv_nsec + timeout_ns) / 1000000000; + ts.tv_nsec = ((int64_t)ts.tv_nsec + timeout_ns) % 1000000000; + ts.tv_sec += secs; + info("ts.tv_sec = %ld\n", ts.tv_sec); + info("ts.tv_nsec = %ld\n", ts.tv_nsec); + tsp = &ts; + } + + if (broadcast) + wakerfn = broadcast_wakerfn; + + if (third_party_owner) { + if (create_rt_thread(&blocker, third_party_blocker, + (void *)&blocker_arg, SCHED_FIFO, 1)) { + error("Creating third party blocker thread failed\n", + errno); + ret = RET_ERROR; + goto out; + } + } + + atomic_set(&waiters_woken, 0); + for (i = 0; i < THREAD_MAX; i++) { + args[i].id = i; + args[i].timeout = tsp; + info("Starting thread %d\n", i); + if (create_rt_thread(&waiter[i], waiterfn, (void *)&args[i], + SCHED_FIFO, 1)) { + error("Creating waiting thread failed\n", errno); + ret = RET_ERROR; + goto out; + } + } + waker_arg.lock = lock; + if (create_rt_thread(&waker, wakerfn, (void *)&waker_arg, + SCHED_FIFO, 1)) { + error("Creating waker thread failed\n", errno); + ret = RET_ERROR; + goto out; + } + + /* Wait for threads to finish */ + /* Store the first error or failure encountered in waiter_ret */ + waiter_ret = &args[0].ret; + for (i = 0; i < THREAD_MAX; i++) + pthread_join(waiter[i], + *waiter_ret ? NULL : (void **)&waiter_ret); + + if (third_party_owner) + pthread_join(blocker, NULL); + pthread_join(waker, NULL); + +out: + if (!ret) { + if (*waiter_ret) + ret = *waiter_ret; + else if (waker_arg.ret < 0) + ret = waker_arg.ret; + else if (blocker_arg.ret) + ret = blocker_arg.ret; + } + + return ret; +} + +int main(int argc, char *argv[]) +{ + int c, ret; + + while ((c = getopt(argc, argv, "bchlot:v:")) != -1) { + switch (c) { + case 'b': + broadcast = 1; + break; + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'l': + locked = 1; + break; + case 'o': + owner = 1; + locked = 0; + break; + case 't': + timeout_ns = atoi(optarg); + break; + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + printf("%s: Test requeue functionality\n", basename(argv[0])); + printf("\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n", + broadcast, locked, owner, timeout_ns); + + /* + * FIXME: unit_test is obsolete now that we parse options and the + * various style of runs are done by run.sh - simplify the code and move + * unit_test into main() + */ + ret = unit_test(broadcast, locked, owner, timeout_ns); + + print_result(ret); + return ret; +} diff --git a/kernel/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c b/kernel/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c new file mode 100644 index 000000000..d5e4f2c4d --- /dev/null +++ b/kernel/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c @@ -0,0 +1,135 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * 1. Block a thread using FUTEX_WAIT + * 2. Attempt to use FUTEX_CMP_REQUEUE_PI on the futex from 1. + * 3. The kernel must detect the mismatch and return -EINVAL. + * + * AUTHOR + * Darren Hart <dvhart@linux.intel.com> + * + * HISTORY + * 2009-Nov-9: Initial version by Darren Hart <dvhart@linux.intel.com> + * + *****************************************************************************/ + +#include <errno.h> +#include <getopt.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include "futextest.h" +#include "logging.h" + +futex_t f1 = FUTEX_INITIALIZER; +futex_t f2 = FUTEX_INITIALIZER; +int child_ret = 0; + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +void *blocking_child(void *arg) +{ + child_ret = futex_wait(&f1, f1, NULL, FUTEX_PRIVATE_FLAG); + if (child_ret < 0) { + child_ret = -errno; + error("futex_wait\n", errno); + } + return (void *)&child_ret; +} + +int main(int argc, char *argv[]) +{ + int ret = RET_PASS; + pthread_t child; + int c; + + while ((c = getopt(argc, argv, "chv:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + printf("%s: Detect mismatched requeue_pi operations\n", + basename(argv[0])); + + if (pthread_create(&child, NULL, blocking_child, NULL)) { + error("pthread_create\n", errno); + ret = RET_ERROR; + goto out; + } + /* Allow the child to block in the kernel. */ + sleep(1); + + /* + * The kernel should detect the waiter did not setup the + * q->requeue_pi_key and return -EINVAL. If it does not, + * it likely gave the lock to the child, which is now hung + * in the kernel. + */ + ret = futex_cmp_requeue_pi(&f1, f1, &f2, 1, 0, FUTEX_PRIVATE_FLAG); + if (ret < 0) { + if (errno == EINVAL) { + /* + * The kernel correctly detected the mismatched + * requeue_pi target and aborted. Wake the child with + * FUTEX_WAKE. + */ + ret = futex_wake(&f1, 1, FUTEX_PRIVATE_FLAG); + if (ret == 1) { + ret = RET_PASS; + } else if (ret < 0) { + error("futex_wake\n", errno); + ret = RET_ERROR; + } else { + error("futex_wake did not wake the child\n", 0); + ret = RET_ERROR; + } + } else { + error("futex_cmp_requeue_pi\n", errno); + ret = RET_ERROR; + } + } else if (ret > 0) { + fail("futex_cmp_requeue_pi failed to detect the mismatch\n"); + ret = RET_FAIL; + } else { + error("futex_cmp_requeue_pi found no waiters\n", 0); + ret = RET_ERROR; + } + + pthread_join(child, NULL); + + if (!ret) + ret = child_ret; + + out: + /* If the kernel crashes, we shouldn't return at all. */ + print_result(ret); + return ret; +} diff --git a/kernel/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c b/kernel/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c new file mode 100644 index 000000000..3d7dc6afc --- /dev/null +++ b/kernel/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c @@ -0,0 +1,223 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2006-2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * This test exercises the futex_wait_requeue_pi() signal handling both + * before and after the requeue. The first should be restarted by the + * kernel. The latter should return EWOULDBLOCK to the waiter. + * + * AUTHORS + * Darren Hart <dvhart@linux.intel.com> + * + * HISTORY + * 2008-May-5: Initial version by Darren Hart <dvhart@linux.intel.com> + * + *****************************************************************************/ + +#include <errno.h> +#include <getopt.h> +#include <limits.h> +#include <pthread.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "atomic.h" +#include "futextest.h" +#include "logging.h" + +#define DELAY_US 100 + +futex_t f1 = FUTEX_INITIALIZER; +futex_t f2 = FUTEX_INITIALIZER; +atomic_t requeued = ATOMIC_INITIALIZER; + +int waiter_ret = 0; + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg, + int policy, int prio) +{ + struct sched_param schedp; + pthread_attr_t attr; + int ret; + + pthread_attr_init(&attr); + memset(&schedp, 0, sizeof(schedp)); + + ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED); + if (ret) { + error("pthread_attr_setinheritsched\n", ret); + return -1; + } + + ret = pthread_attr_setschedpolicy(&attr, policy); + if (ret) { + error("pthread_attr_setschedpolicy\n", ret); + return -1; + } + + schedp.sched_priority = prio; + ret = pthread_attr_setschedparam(&attr, &schedp); + if (ret) { + error("pthread_attr_setschedparam\n", ret); + return -1; + } + + ret = pthread_create(pth, &attr, func, arg); + if (ret) { + error("pthread_create\n", ret); + return -1; + } + return 0; +} + +void handle_signal(int signo) +{ + info("signal received %s requeue\n", + requeued.val ? "after" : "prior to"); +} + +void *waiterfn(void *arg) +{ + unsigned int old_val; + int res; + + waiter_ret = RET_PASS; + + info("Waiter running\n"); + info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2); + old_val = f1; + res = futex_wait_requeue_pi(&f1, old_val, &(f2), NULL, + FUTEX_PRIVATE_FLAG); + if (!requeued.val || errno != EWOULDBLOCK) { + fail("unexpected return from futex_wait_requeue_pi: %d (%s)\n", + res, strerror(errno)); + info("w2:futex: %x\n", f2); + if (!res) + futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); + waiter_ret = RET_FAIL; + } + + info("Waiter exiting with %d\n", waiter_ret); + pthread_exit(NULL); +} + + +int main(int argc, char *argv[]) +{ + unsigned int old_val; + struct sigaction sa; + pthread_t waiter; + int c, res, ret = RET_PASS; + + while ((c = getopt(argc, argv, "chv:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + printf("%s: Test signal handling during requeue_pi\n", + basename(argv[0])); + printf("\tArguments: <none>\n"); + + sa.sa_handler = handle_signal; + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + if (sigaction(SIGUSR1, &sa, NULL)) { + error("sigaction\n", errno); + exit(1); + } + + info("m1:f2: %x\n", f2); + info("Creating waiter\n"); + res = create_rt_thread(&waiter, waiterfn, NULL, SCHED_FIFO, 1); + if (res) { + error("Creating waiting thread failed", res); + ret = RET_ERROR; + goto out; + } + + info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2); + info("m2:f2: %x\n", f2); + futex_lock_pi(&f2, 0, 0, FUTEX_PRIVATE_FLAG); + info("m3:f2: %x\n", f2); + + while (1) { + /* + * signal the waiter before requeue, waiter should automatically + * restart futex_wait_requeue_pi() in the kernel. Wait for the + * waiter to block on f1 again. + */ + info("Issuing SIGUSR1 to waiter\n"); + pthread_kill(waiter, SIGUSR1); + usleep(DELAY_US); + + info("Requeueing waiter via FUTEX_CMP_REQUEUE_PI\n"); + old_val = f1; + res = futex_cmp_requeue_pi(&f1, old_val, &(f2), 1, 0, + FUTEX_PRIVATE_FLAG); + /* + * If res is non-zero, we either requeued the waiter or hit an + * error, break out and handle it. If it is zero, then the + * signal may have hit before the the waiter was blocked on f1. + * Try again. + */ + if (res > 0) { + atomic_set(&requeued, 1); + break; + } else if (res < 0) { + error("FUTEX_CMP_REQUEUE_PI failed\n", errno); + ret = RET_ERROR; + break; + } + } + info("m4:f2: %x\n", f2); + + /* + * Signal the waiter after requeue, waiter should return from + * futex_wait_requeue_pi() with EWOULDBLOCK. Join the thread here so the + * futex_unlock_pi() can't happen before the signal wakeup is detected + * in the kernel. + */ + info("Issuing SIGUSR1 to waiter\n"); + pthread_kill(waiter, SIGUSR1); + info("Waiting for waiter to return\n"); + pthread_join(waiter, NULL); + + info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", f2, &f2); + futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); + info("m5:f2: %x\n", f2); + + out: + if (ret == RET_PASS && waiter_ret) + ret = waiter_ret; + + print_result(ret); + return ret; +} diff --git a/kernel/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c b/kernel/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c new file mode 100644 index 000000000..5f687f247 --- /dev/null +++ b/kernel/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c @@ -0,0 +1,125 @@ +/****************************************************************************** + * + * Copyright FUJITSU LIMITED 2010 + * Copyright KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * Internally, Futex has two handling mode, anon and file. The private file + * mapping is special. At first it behave as file, but after write anything + * it behave as anon. This test is intent to test such case. + * + * AUTHOR + * KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> + * + * HISTORY + * 2010-Jan-6: Initial version by KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> + * + *****************************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include <syscall.h> +#include <unistd.h> +#include <errno.h> +#include <linux/futex.h> +#include <pthread.h> +#include <libgen.h> +#include <signal.h> + +#include "logging.h" +#include "futextest.h" + +#define PAGE_SZ 4096 + +char pad[PAGE_SZ] = {1}; +futex_t val = 1; +char pad2[PAGE_SZ] = {1}; + +#define WAKE_WAIT_US 3000000 +struct timespec wait_timeout = { .tv_sec = 5, .tv_nsec = 0}; + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +void *thr_futex_wait(void *arg) +{ + int ret; + + info("futex wait\n"); + ret = futex_wait(&val, 1, &wait_timeout, 0); + if (ret && errno != EWOULDBLOCK && errno != ETIMEDOUT) { + error("futex error.\n", errno); + print_result(RET_ERROR); + exit(RET_ERROR); + } + + if (ret && errno == ETIMEDOUT) + fail("waiter timedout\n"); + + info("futex_wait: ret = %d, errno = %d\n", ret, errno); + + return NULL; +} + +int main(int argc, char **argv) +{ + pthread_t thr; + int ret = RET_PASS; + int res; + int c; + + while ((c = getopt(argc, argv, "chv:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + printf("%s: Test the futex value of private file mappings in FUTEX_WAIT\n", + basename(argv[0])); + + ret = pthread_create(&thr, NULL, thr_futex_wait, NULL); + if (ret < 0) { + fprintf(stderr, "pthread_create error\n"); + ret = RET_ERROR; + goto out; + } + + info("wait a while\n"); + usleep(WAKE_WAIT_US); + val = 2; + res = futex_wake(&val, 1, 0); + info("futex_wake %d\n", res); + if (res != 1) { + fail("FUTEX_WAKE didn't find the waiting thread.\n"); + ret = RET_FAIL; + } + + info("join\n"); + pthread_join(thr, NULL); + + out: + print_result(ret); + return ret; +} diff --git a/kernel/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/kernel/tools/testing/selftests/futex/functional/futex_wait_timeout.c new file mode 100644 index 000000000..ab428ca89 --- /dev/null +++ b/kernel/tools/testing/selftests/futex/functional/futex_wait_timeout.c @@ -0,0 +1,86 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * Block on a futex and wait for timeout. + * + * AUTHOR + * Darren Hart <dvhart@linux.intel.com> + * + * HISTORY + * 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com> + * + *****************************************************************************/ + +#include <errno.h> +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include "futextest.h" +#include "logging.h" + +static long timeout_ns = 100000; /* 100us default timeout */ + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -t N Timeout in nanoseconds (default: 100,000)\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +int main(int argc, char *argv[]) +{ + futex_t f1 = FUTEX_INITIALIZER; + struct timespec to; + int res, ret = RET_PASS; + int c; + + while ((c = getopt(argc, argv, "cht:v:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 't': + timeout_ns = atoi(optarg); + break; + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + printf("%s: Block on a futex and wait for timeout\n", + basename(argv[0])); + printf("\tArguments: timeout=%ldns\n", timeout_ns); + + /* initialize timeout */ + to.tv_sec = 0; + to.tv_nsec = timeout_ns; + + info("Calling futex_wait on f1: %u @ %p\n", f1, &f1); + res = futex_wait(&f1, f1, &to, FUTEX_PRIVATE_FLAG); + if (!res || errno != ETIMEDOUT) { + fail("futex_wait returned %d\n", ret < 0 ? errno : ret); + ret = RET_FAIL; + } + + print_result(ret); + return ret; +} diff --git a/kernel/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c b/kernel/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c new file mode 100644 index 000000000..fe7aee968 --- /dev/null +++ b/kernel/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c @@ -0,0 +1,124 @@ +/****************************************************************************** + * + * Copyright FUJITSU LIMITED 2010 + * Copyright KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * Wait on uninitialized heap. It shold be zero and FUTEX_WAIT should + * return immediately. This test is intent to test zero page handling in + * futex. + * + * AUTHOR + * KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> + * + * HISTORY + * 2010-Jan-6: Initial version by KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> + * + *****************************************************************************/ + +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <syscall.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <errno.h> +#include <linux/futex.h> +#include <libgen.h> + +#include "logging.h" +#include "futextest.h" + +#define WAIT_US 5000000 + +static int child_blocked = 1; +static int child_ret; +void *buf; + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +void *wait_thread(void *arg) +{ + int res; + + child_ret = RET_PASS; + res = futex_wait(buf, 1, NULL, 0); + child_blocked = 0; + + if (res != 0 && errno != EWOULDBLOCK) { + error("futex failure\n", errno); + child_ret = RET_ERROR; + } + pthread_exit(NULL); +} + +int main(int argc, char **argv) +{ + int c, ret = RET_PASS; + long page_size; + pthread_t thr; + + while ((c = getopt(argc, argv, "chv:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + page_size = sysconf(_SC_PAGESIZE); + + buf = mmap(NULL, page_size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); + if (buf == (void *)-1) { + error("mmap\n", errno); + exit(1); + } + + printf("%s: Test the uninitialized futex value in FUTEX_WAIT\n", + basename(argv[0])); + + + ret = pthread_create(&thr, NULL, wait_thread, NULL); + if (ret) { + error("pthread_create\n", errno); + ret = RET_ERROR; + goto out; + } + + info("waiting %dus for child to return\n", WAIT_US); + usleep(WAIT_US); + + ret = child_ret; + if (child_blocked) { + fail("child blocked in kernel\n"); + ret = RET_FAIL; + } + + out: + print_result(ret); + return ret; +} diff --git a/kernel/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/kernel/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c new file mode 100644 index 000000000..b6b027448 --- /dev/null +++ b/kernel/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c @@ -0,0 +1,79 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * Test if FUTEX_WAIT op returns -EWOULDBLOCK if the futex value differs + * from the expected one. + * + * AUTHOR + * Gowrishankar <gowrishankar.m@in.ibm.com> + * + * HISTORY + * 2009-Nov-14: Initial version by Gowrishankar <gowrishankar.m@in.ibm.com> + * + *****************************************************************************/ + +#include <errno.h> +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include "futextest.h" +#include "logging.h" + +#define timeout_ns 100000 + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +int main(int argc, char *argv[]) +{ + struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns}; + futex_t f1 = FUTEX_INITIALIZER; + int res, ret = RET_PASS; + int c; + + while ((c = getopt(argc, argv, "cht:v:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + printf("%s: Test the unexpected futex value in FUTEX_WAIT\n", + basename(argv[0])); + + info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); + res = futex_wait(&f1, f1+1, &to, FUTEX_PRIVATE_FLAG); + if (!res || errno != EWOULDBLOCK) { + fail("futex_wait returned: %d %s\n", + res ? errno : res, res ? strerror(errno) : ""); + ret = RET_FAIL; + } + + print_result(ret); + return ret; +} diff --git a/kernel/tools/testing/selftests/futex/functional/run.sh b/kernel/tools/testing/selftests/futex/functional/run.sh new file mode 100755 index 000000000..e87dbe2a0 --- /dev/null +++ b/kernel/tools/testing/selftests/futex/functional/run.sh @@ -0,0 +1,79 @@ +#!/bin/sh + +############################################################################### +# +# Copyright © International Business Machines Corp., 2009 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# DESCRIPTION +# Run tests in the current directory. +# +# AUTHOR +# Darren Hart <dvhart@linux.intel.com> +# +# HISTORY +# 2009-Nov-9: Initial version by Darren Hart <dvhart@linux.intel.com> +# 2010-Jan-6: Add futex_wait_uninitialized_heap and futex_wait_private_mapped_file +# by KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> +# +############################################################################### + +# Test for a color capable console +if [ -z "$USE_COLOR" ]; then + tput setf 7 + if [ $? -eq 0 ]; then + USE_COLOR=1 + tput sgr0 + fi +fi +if [ "$USE_COLOR" -eq 1 ]; then + COLOR="-c" +fi + + +echo +# requeue pi testing +# without timeouts +./futex_requeue_pi $COLOR +./futex_requeue_pi $COLOR -b +./futex_requeue_pi $COLOR -b -l +./futex_requeue_pi $COLOR -b -o +./futex_requeue_pi $COLOR -l +./futex_requeue_pi $COLOR -o +# with timeouts +./futex_requeue_pi $COLOR -b -l -t 5000 +./futex_requeue_pi $COLOR -l -t 5000 +./futex_requeue_pi $COLOR -b -l -t 500000 +./futex_requeue_pi $COLOR -l -t 500000 +./futex_requeue_pi $COLOR -b -t 5000 +./futex_requeue_pi $COLOR -t 5000 +./futex_requeue_pi $COLOR -b -t 500000 +./futex_requeue_pi $COLOR -t 500000 +./futex_requeue_pi $COLOR -b -o -t 5000 +./futex_requeue_pi $COLOR -l -t 5000 +./futex_requeue_pi $COLOR -b -o -t 500000 +./futex_requeue_pi $COLOR -l -t 500000 +# with long timeout +./futex_requeue_pi $COLOR -b -l -t 2000000000 +./futex_requeue_pi $COLOR -l -t 2000000000 + + +echo +./futex_requeue_pi_mismatched_ops $COLOR + +echo +./futex_requeue_pi_signal_restart $COLOR + +echo +./futex_wait_timeout $COLOR + +echo +./futex_wait_wouldblock $COLOR + +echo +./futex_wait_uninitialized_heap $COLOR +./futex_wait_private_mapped_file $COLOR diff --git a/kernel/tools/testing/selftests/futex/include/atomic.h b/kernel/tools/testing/selftests/futex/include/atomic.h new file mode 100644 index 000000000..f861da3e3 --- /dev/null +++ b/kernel/tools/testing/selftests/futex/include/atomic.h @@ -0,0 +1,83 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * GCC atomic builtin wrappers + * http://gcc.gnu.org/onlinedocs/gcc-4.1.0/gcc/Atomic-Builtins.html + * + * AUTHOR + * Darren Hart <dvhart@linux.intel.com> + * + * HISTORY + * 2009-Nov-17: Initial version by Darren Hart <dvhart@linux.intel.com> + * + *****************************************************************************/ + +#ifndef _ATOMIC_H +#define _ATOMIC_H + +typedef struct { + volatile int val; +} atomic_t; + +#define ATOMIC_INITIALIZER { 0 } + +/** + * atomic_cmpxchg() - Atomic compare and exchange + * @uaddr: The address of the futex to be modified + * @oldval: The expected value of the futex + * @newval: The new value to try and assign the futex + * + * Return the old value of addr->val. + */ +static inline int +atomic_cmpxchg(atomic_t *addr, int oldval, int newval) +{ + return __sync_val_compare_and_swap(&addr->val, oldval, newval); +} + +/** + * atomic_inc() - Atomic incrememnt + * @addr: Address of the variable to increment + * + * Return the new value of addr->val. + */ +static inline int +atomic_inc(atomic_t *addr) +{ + return __sync_add_and_fetch(&addr->val, 1); +} + +/** + * atomic_dec() - Atomic decrement + * @addr: Address of the variable to decrement + * + * Return the new value of addr-val. + */ +static inline int +atomic_dec(atomic_t *addr) +{ + return __sync_sub_and_fetch(&addr->val, 1); +} + +/** + * atomic_set() - Atomic set + * @addr: Address of the variable to set + * @newval: New value for the atomic_t + * + * Return the new value of addr->val. + */ +static inline int +atomic_set(atomic_t *addr, int newval) +{ + addr->val = newval; + return newval; +} + +#endif diff --git a/kernel/tools/testing/selftests/futex/include/futextest.h b/kernel/tools/testing/selftests/futex/include/futextest.h new file mode 100644 index 000000000..b98c3aba7 --- /dev/null +++ b/kernel/tools/testing/selftests/futex/include/futextest.h @@ -0,0 +1,266 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * Glibc independent futex library for testing kernel functionality. + * + * AUTHOR + * Darren Hart <dvhart@linux.intel.com> + * + * HISTORY + * 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com> + * + *****************************************************************************/ + +#ifndef _FUTEXTEST_H +#define _FUTEXTEST_H + +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <linux/futex.h> + +typedef volatile u_int32_t futex_t; +#define FUTEX_INITIALIZER 0 + +/* Define the newer op codes if the system header file is not up to date. */ +#ifndef FUTEX_WAIT_BITSET +#define FUTEX_WAIT_BITSET 9 +#endif +#ifndef FUTEX_WAKE_BITSET +#define FUTEX_WAKE_BITSET 10 +#endif +#ifndef FUTEX_WAIT_REQUEUE_PI +#define FUTEX_WAIT_REQUEUE_PI 11 +#endif +#ifndef FUTEX_CMP_REQUEUE_PI +#define FUTEX_CMP_REQUEUE_PI 12 +#endif +#ifndef FUTEX_WAIT_REQUEUE_PI_PRIVATE +#define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) +#endif +#ifndef FUTEX_REQUEUE_PI_PRIVATE +#define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) +#endif + +/** + * futex() - SYS_futex syscall wrapper + * @uaddr: address of first futex + * @op: futex op code + * @val: typically expected value of uaddr, but varies by op + * @timeout: typically an absolute struct timespec (except where noted + * otherwise). Overloaded by some ops + * @uaddr2: address of second futex for some ops\ + * @val3: varies by op + * @opflags: flags to be bitwise OR'd with op, such as FUTEX_PRIVATE_FLAG + * + * futex() is used by all the following futex op wrappers. It can also be + * used for misuse and abuse testing. Generally, the specific op wrappers + * should be used instead. It is a macro instead of an static inline function as + * some of the types over overloaded (timeout is used for nr_requeue for + * example). + * + * These argument descriptions are the defaults for all + * like-named arguments in the following wrappers except where noted below. + */ +#define futex(uaddr, op, val, timeout, uaddr2, val3, opflags) \ + syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3) + +/** + * futex_wait() - block on uaddr with optional timeout + * @timeout: relative timeout + */ +static inline int +futex_wait(futex_t *uaddr, futex_t val, struct timespec *timeout, int opflags) +{ + return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags); +} + +/** + * futex_wake() - wake one or more tasks blocked on uaddr + * @nr_wake: wake up to this many tasks + */ +static inline int +futex_wake(futex_t *uaddr, int nr_wake, int opflags) +{ + return futex(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags); +} + +/** + * futex_wait_bitset() - block on uaddr with bitset + * @bitset: bitset to be used with futex_wake_bitset + */ +static inline int +futex_wait_bitset(futex_t *uaddr, futex_t val, struct timespec *timeout, + u_int32_t bitset, int opflags) +{ + return futex(uaddr, FUTEX_WAIT_BITSET, val, timeout, NULL, bitset, + opflags); +} + +/** + * futex_wake_bitset() - wake one or more tasks blocked on uaddr with bitset + * @bitset: bitset to compare with that used in futex_wait_bitset + */ +static inline int +futex_wake_bitset(futex_t *uaddr, int nr_wake, u_int32_t bitset, int opflags) +{ + return futex(uaddr, FUTEX_WAKE_BITSET, nr_wake, NULL, NULL, bitset, + opflags); +} + +/** + * futex_lock_pi() - block on uaddr as a PI mutex + * @detect: whether (1) or not (0) to perform deadlock detection + */ +static inline int +futex_lock_pi(futex_t *uaddr, struct timespec *timeout, int detect, + int opflags) +{ + return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags); +} + +/** + * futex_unlock_pi() - release uaddr as a PI mutex, waking the top waiter + */ +static inline int +futex_unlock_pi(futex_t *uaddr, int opflags) +{ + return futex(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags); +} + +/** + * futex_wake_op() - FIXME: COME UP WITH A GOOD ONE LINE DESCRIPTION + */ +static inline int +futex_wake_op(futex_t *uaddr, futex_t *uaddr2, int nr_wake, int nr_wake2, + int wake_op, int opflags) +{ + return futex(uaddr, FUTEX_WAKE_OP, nr_wake, nr_wake2, uaddr2, wake_op, + opflags); +} + +/** + * futex_requeue() - requeue without expected value comparison, deprecated + * @nr_wake: wake up to this many tasks + * @nr_requeue: requeue up to this many tasks + * + * Due to its inherently racy implementation, futex_requeue() is deprecated in + * favor of futex_cmp_requeue(). + */ +static inline int +futex_requeue(futex_t *uaddr, futex_t *uaddr2, int nr_wake, int nr_requeue, + int opflags) +{ + return futex(uaddr, FUTEX_REQUEUE, nr_wake, nr_requeue, uaddr2, 0, + opflags); +} + +/** + * futex_cmp_requeue() - requeue tasks from uaddr to uaddr2 + * @nr_wake: wake up to this many tasks + * @nr_requeue: requeue up to this many tasks + */ +static inline int +futex_cmp_requeue(futex_t *uaddr, futex_t val, futex_t *uaddr2, int nr_wake, + int nr_requeue, int opflags) +{ + return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2, + val, opflags); +} + +/** + * futex_wait_requeue_pi() - block on uaddr and prepare to requeue to uaddr2 + * @uaddr: non-PI futex source + * @uaddr2: PI futex target + * + * This is the first half of the requeue_pi mechanism. It shall always be + * paired with futex_cmp_requeue_pi(). + */ +static inline int +futex_wait_requeue_pi(futex_t *uaddr, futex_t val, futex_t *uaddr2, + struct timespec *timeout, int opflags) +{ + return futex(uaddr, FUTEX_WAIT_REQUEUE_PI, val, timeout, uaddr2, 0, + opflags); +} + +/** + * futex_cmp_requeue_pi() - requeue tasks from uaddr to uaddr2 (PI aware) + * @uaddr: non-PI futex source + * @uaddr2: PI futex target + * @nr_wake: wake up to this many tasks + * @nr_requeue: requeue up to this many tasks + */ +static inline int +futex_cmp_requeue_pi(futex_t *uaddr, futex_t val, futex_t *uaddr2, int nr_wake, + int nr_requeue, int opflags) +{ + return futex(uaddr, FUTEX_CMP_REQUEUE_PI, nr_wake, nr_requeue, uaddr2, + val, opflags); +} + +/** + * futex_cmpxchg() - atomic compare and exchange + * @uaddr: The address of the futex to be modified + * @oldval: The expected value of the futex + * @newval: The new value to try and assign the futex + * + * Implement cmpxchg using gcc atomic builtins. + * http://gcc.gnu.org/onlinedocs/gcc-4.1.0/gcc/Atomic-Builtins.html + * + * Return the old futex value. + */ +static inline u_int32_t +futex_cmpxchg(futex_t *uaddr, u_int32_t oldval, u_int32_t newval) +{ + return __sync_val_compare_and_swap(uaddr, oldval, newval); +} + +/** + * futex_dec() - atomic decrement of the futex value + * @uaddr: The address of the futex to be modified + * + * Return the new futex value. + */ +static inline u_int32_t +futex_dec(futex_t *uaddr) +{ + return __sync_sub_and_fetch(uaddr, 1); +} + +/** + * futex_inc() - atomic increment of the futex value + * @uaddr: the address of the futex to be modified + * + * Return the new futex value. + */ +static inline u_int32_t +futex_inc(futex_t *uaddr) +{ + return __sync_add_and_fetch(uaddr, 1); +} + +/** + * futex_set() - atomic decrement of the futex value + * @uaddr: the address of the futex to be modified + * @newval: New value for the atomic_t + * + * Return the new futex value. + */ +static inline u_int32_t +futex_set(futex_t *uaddr, u_int32_t newval) +{ + *uaddr = newval; + return newval; +} + +#endif diff --git a/kernel/tools/testing/selftests/futex/include/logging.h b/kernel/tools/testing/selftests/futex/include/logging.h new file mode 100644 index 000000000..014aa0119 --- /dev/null +++ b/kernel/tools/testing/selftests/futex/include/logging.h @@ -0,0 +1,153 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * Glibc independent futex library for testing kernel functionality. + * + * AUTHOR + * Darren Hart <dvhart@linux.intel.com> + * + * HISTORY + * 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com> + * + *****************************************************************************/ + +#ifndef _LOGGING_H +#define _LOGGING_H + +#include <string.h> +#include <unistd.h> +#include <linux/futex.h> +#include "kselftest.h" + +/* + * Define PASS, ERROR, and FAIL strings with and without color escape + * sequences, default to no color. + */ +#define ESC 0x1B, '[' +#define BRIGHT '1' +#define GREEN '3', '2' +#define YELLOW '3', '3' +#define RED '3', '1' +#define ESCEND 'm' +#define BRIGHT_GREEN ESC, BRIGHT, ';', GREEN, ESCEND +#define BRIGHT_YELLOW ESC, BRIGHT, ';', YELLOW, ESCEND +#define BRIGHT_RED ESC, BRIGHT, ';', RED, ESCEND +#define RESET_COLOR ESC, '0', 'm' +static const char PASS_COLOR[] = {BRIGHT_GREEN, ' ', 'P', 'A', 'S', 'S', + RESET_COLOR, 0}; +static const char ERROR_COLOR[] = {BRIGHT_YELLOW, 'E', 'R', 'R', 'O', 'R', + RESET_COLOR, 0}; +static const char FAIL_COLOR[] = {BRIGHT_RED, ' ', 'F', 'A', 'I', 'L', + RESET_COLOR, 0}; +static const char INFO_NORMAL[] = " INFO"; +static const char PASS_NORMAL[] = " PASS"; +static const char ERROR_NORMAL[] = "ERROR"; +static const char FAIL_NORMAL[] = " FAIL"; +const char *INFO = INFO_NORMAL; +const char *PASS = PASS_NORMAL; +const char *ERROR = ERROR_NORMAL; +const char *FAIL = FAIL_NORMAL; + +/* Verbosity setting for INFO messages */ +#define VQUIET 0 +#define VCRITICAL 1 +#define VINFO 2 +#define VMAX VINFO +int _verbose = VCRITICAL; + +/* Functional test return codes */ +#define RET_PASS 0 +#define RET_ERROR -1 +#define RET_FAIL -2 + +/** + * log_color() - Use colored output for PASS, ERROR, and FAIL strings + * @use_color: use color (1) or not (0) + */ +void log_color(int use_color) +{ + if (use_color) { + PASS = PASS_COLOR; + ERROR = ERROR_COLOR; + FAIL = FAIL_COLOR; + } else { + PASS = PASS_NORMAL; + ERROR = ERROR_NORMAL; + FAIL = FAIL_NORMAL; + } +} + +/** + * log_verbosity() - Set verbosity of test output + * @verbose: Enable (1) verbose output or not (0) + * + * Currently setting verbose=1 will enable INFO messages and 0 will disable + * them. FAIL and ERROR messages are always displayed. + */ +void log_verbosity(int level) +{ + if (level > VMAX) + level = VMAX; + else if (level < 0) + level = 0; + _verbose = level; +} + +/** + * print_result() - Print standard PASS | ERROR | FAIL results + * @ret: the return value to be considered: 0 | RET_ERROR | RET_FAIL + * + * print_result() is primarily intended for functional tests. + */ +void print_result(int ret) +{ + const char *result = "Unknown return code"; + + switch (ret) { + case RET_PASS: + ksft_inc_pass_cnt(); + result = PASS; + break; + case RET_ERROR: + result = ERROR; + break; + case RET_FAIL: + ksft_inc_fail_cnt(); + result = FAIL; + break; + } + printf("Result: %s\n", result); +} + +/* log level macros */ +#define info(message, vargs...) \ +do { \ + if (_verbose >= VINFO) \ + fprintf(stderr, "\t%s: "message, INFO, ##vargs); \ +} while (0) + +#define error(message, err, args...) \ +do { \ + if (_verbose >= VCRITICAL) {\ + if (err) \ + fprintf(stderr, "\t%s: %s: "message, \ + ERROR, strerror(err), ##args); \ + else \ + fprintf(stderr, "\t%s: "message, ERROR, ##args); \ + } \ +} while (0) + +#define fail(message, args...) \ +do { \ + if (_verbose >= VCRITICAL) \ + fprintf(stderr, "\t%s: "message, FAIL, ##args); \ +} while (0) + +#endif diff --git a/kernel/tools/testing/selftests/futex/run.sh b/kernel/tools/testing/selftests/futex/run.sh new file mode 100755 index 000000000..4126312ad --- /dev/null +++ b/kernel/tools/testing/selftests/futex/run.sh @@ -0,0 +1,33 @@ +#!/bin/sh + +############################################################################### +# +# Copyright © International Business Machines Corp., 2009 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# DESCRIPTION +# Run all tests under the functional, performance, and stress directories. +# Format and summarize the results. +# +# AUTHOR +# Darren Hart <dvhart@linux.intel.com> +# +# HISTORY +# 2009-Nov-9: Initial version by Darren Hart <dvhart@linux.intel.com> +# +############################################################################### + +# Test for a color capable shell and pass the result to the subdir scripts +USE_COLOR=0 +tput setf 7 +if [ $? -eq 0 ]; then + USE_COLOR=1 + tput sgr0 +fi +export USE_COLOR + +(cd functional; ./run.sh) diff --git a/kernel/tools/testing/selftests/kselftest.h b/kernel/tools/testing/selftests/kselftest.h index 572c88881..ef1c80d67 100644 --- a/kernel/tools/testing/selftests/kselftest.h +++ b/kernel/tools/testing/selftests/kselftest.h @@ -13,6 +13,13 @@ #include <stdlib.h> #include <unistd.h> +/* define kselftest exit codes */ +#define KSFT_PASS 0 +#define KSFT_FAIL 1 +#define KSFT_XFAIL 2 +#define KSFT_XPASS 3 +#define KSFT_SKIP 4 + /* counters */ struct ksft_count { unsigned int ksft_pass; @@ -40,23 +47,23 @@ static inline void ksft_print_cnts(void) static inline int ksft_exit_pass(void) { - exit(0); + exit(KSFT_PASS); } static inline int ksft_exit_fail(void) { - exit(1); + exit(KSFT_FAIL); } static inline int ksft_exit_xfail(void) { - exit(2); + exit(KSFT_XFAIL); } static inline int ksft_exit_xpass(void) { - exit(3); + exit(KSFT_XPASS); } static inline int ksft_exit_skip(void) { - exit(4); + exit(KSFT_SKIP); } #endif /* __KSELFTEST_H */ diff --git a/kernel/tools/testing/selftests/lib.mk b/kernel/tools/testing/selftests/lib.mk index 2194155ae..50a93f5f1 100644 --- a/kernel/tools/testing/selftests/lib.mk +++ b/kernel/tools/testing/selftests/lib.mk @@ -12,8 +12,11 @@ run_tests: all $(RUN_TESTS) define INSTALL_RULE - mkdir -p $(INSTALL_PATH) - install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) + @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \ + mkdir -p ${INSTALL_PATH}; \ + echo "rsync -a $(TEST_DIRS) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/"; \ + rsync -a $(TEST_DIRS) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/; \ + fi endef install: all diff --git a/kernel/tools/testing/selftests/lib/Makefile b/kernel/tools/testing/selftests/lib/Makefile new file mode 100644 index 000000000..47147b968 --- /dev/null +++ b/kernel/tools/testing/selftests/lib/Makefile @@ -0,0 +1,8 @@ +# Makefile for lib/ function selftests + +# No binaries, but make sure arg-less "make" doesn't trigger "run_tests" +all: + +TEST_PROGS := printf.sh + +include ../lib.mk diff --git a/kernel/tools/testing/selftests/lib/printf.sh b/kernel/tools/testing/selftests/lib/printf.sh new file mode 100644 index 000000000..4fdc70fe6 --- /dev/null +++ b/kernel/tools/testing/selftests/lib/printf.sh @@ -0,0 +1,10 @@ +#!/bin/sh +# Runs printf infrastructure using test_printf kernel module + +if /sbin/modprobe -q test_printf; then + /sbin/modprobe -q -r test_printf + echo "printf: ok" +else + echo "printf: [FAIL]" + exit 1 +fi diff --git a/kernel/tools/testing/selftests/membarrier/.gitignore b/kernel/tools/testing/selftests/membarrier/.gitignore new file mode 100644 index 000000000..020c44f49 --- /dev/null +++ b/kernel/tools/testing/selftests/membarrier/.gitignore @@ -0,0 +1 @@ +membarrier_test diff --git a/kernel/tools/testing/selftests/membarrier/Makefile b/kernel/tools/testing/selftests/membarrier/Makefile new file mode 100644 index 000000000..a1a970858 --- /dev/null +++ b/kernel/tools/testing/selftests/membarrier/Makefile @@ -0,0 +1,10 @@ +CFLAGS += -g -I../../../../usr/include/ + +TEST_PROGS := membarrier_test + +all: $(TEST_PROGS) + +include ../lib.mk + +clean: + $(RM) $(TEST_PROGS) diff --git a/kernel/tools/testing/selftests/membarrier/membarrier_test.c b/kernel/tools/testing/selftests/membarrier/membarrier_test.c new file mode 100644 index 000000000..535f0fef4 --- /dev/null +++ b/kernel/tools/testing/selftests/membarrier/membarrier_test.c @@ -0,0 +1,118 @@ +#define _GNU_SOURCE +#include <linux/membarrier.h> +#include <syscall.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> + +#include "../kselftest.h" + +enum test_membarrier_status { + TEST_MEMBARRIER_PASS = 0, + TEST_MEMBARRIER_FAIL, + TEST_MEMBARRIER_SKIP, +}; + +static int sys_membarrier(int cmd, int flags) +{ + return syscall(__NR_membarrier, cmd, flags); +} + +static enum test_membarrier_status test_membarrier_cmd_fail(void) +{ + int cmd = -1, flags = 0; + + if (sys_membarrier(cmd, flags) != -1) { + printf("membarrier: Wrong command should fail but passed.\n"); + return TEST_MEMBARRIER_FAIL; + } + return TEST_MEMBARRIER_PASS; +} + +static enum test_membarrier_status test_membarrier_flags_fail(void) +{ + int cmd = MEMBARRIER_CMD_QUERY, flags = 1; + + if (sys_membarrier(cmd, flags) != -1) { + printf("membarrier: Wrong flags should fail but passed.\n"); + return TEST_MEMBARRIER_FAIL; + } + return TEST_MEMBARRIER_PASS; +} + +static enum test_membarrier_status test_membarrier_success(void) +{ + int cmd = MEMBARRIER_CMD_SHARED, flags = 0; + + if (sys_membarrier(cmd, flags) != 0) { + printf("membarrier: Executing MEMBARRIER_CMD_SHARED failed. %s.\n", + strerror(errno)); + return TEST_MEMBARRIER_FAIL; + } + + printf("membarrier: MEMBARRIER_CMD_SHARED success.\n"); + return TEST_MEMBARRIER_PASS; +} + +static enum test_membarrier_status test_membarrier(void) +{ + enum test_membarrier_status status; + + status = test_membarrier_cmd_fail(); + if (status) + return status; + status = test_membarrier_flags_fail(); + if (status) + return status; + status = test_membarrier_success(); + if (status) + return status; + return TEST_MEMBARRIER_PASS; +} + +static enum test_membarrier_status test_membarrier_query(void) +{ + int flags = 0, ret; + + printf("membarrier MEMBARRIER_CMD_QUERY "); + ret = sys_membarrier(MEMBARRIER_CMD_QUERY, flags); + if (ret < 0) { + printf("failed. %s.\n", strerror(errno)); + switch (errno) { + case ENOSYS: + /* + * It is valid to build a kernel with + * CONFIG_MEMBARRIER=n. However, this skips the tests. + */ + return TEST_MEMBARRIER_SKIP; + case EINVAL: + default: + return TEST_MEMBARRIER_FAIL; + } + } + if (!(ret & MEMBARRIER_CMD_SHARED)) { + printf("command MEMBARRIER_CMD_SHARED is not supported.\n"); + return TEST_MEMBARRIER_FAIL; + } + printf("syscall available.\n"); + return TEST_MEMBARRIER_PASS; +} + +int main(int argc, char **argv) +{ + switch (test_membarrier_query()) { + case TEST_MEMBARRIER_FAIL: + return ksft_exit_fail(); + case TEST_MEMBARRIER_SKIP: + return ksft_exit_skip(); + } + switch (test_membarrier()) { + case TEST_MEMBARRIER_FAIL: + return ksft_exit_fail(); + case TEST_MEMBARRIER_SKIP: + return ksft_exit_skip(); + } + + printf("membarrier: tests done!\n"); + return ksft_exit_pass(); +} diff --git a/kernel/tools/testing/selftests/memfd/Makefile b/kernel/tools/testing/selftests/memfd/Makefile index 3e7eb7972..fd396ac81 100644 --- a/kernel/tools/testing/selftests/memfd/Makefile +++ b/kernel/tools/testing/selftests/memfd/Makefile @@ -4,16 +4,16 @@ CFLAGS += -I../../../../include/uapi/ CFLAGS += -I../../../../include/ CFLAGS += -I../../../../usr/include/ -all: - $(CC) $(CFLAGS) memfd_test.c -o memfd_test - TEST_PROGS := memfd_test +all: $(TEST_PROGS) + include ../lib.mk -build_fuse: - $(CC) $(CFLAGS) fuse_mnt.c `pkg-config fuse --cflags --libs` -o fuse_mnt - $(CC) $(CFLAGS) fuse_test.c -o fuse_test +build_fuse: fuse_mnt fuse_test + +fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags) +fuse_mnt: LDFLAGS += $(shell pkg-config fuse --libs) run_fuse: build_fuse @./run_fuse_test.sh || echo "fuse_test: [FAIL]" diff --git a/kernel/tools/testing/selftests/memfd/memfd_test.c b/kernel/tools/testing/selftests/memfd/memfd_test.c index 0b9eafb7a..26546892c 100644 --- a/kernel/tools/testing/selftests/memfd/memfd_test.c +++ b/kernel/tools/testing/selftests/memfd/memfd_test.c @@ -15,10 +15,11 @@ #include <sys/mman.h> #include <sys/stat.h> #include <sys/syscall.h> +#include <sys/wait.h> #include <unistd.h> #define MFD_DEF_SIZE 8192 -#define STACK_SIZE 65535 +#define STACK_SIZE 65536 static int sys_memfd_create(const char *name, unsigned int flags) diff --git a/kernel/tools/testing/selftests/memfd/run_fuse_test.sh b/kernel/tools/testing/selftests/memfd/run_fuse_test.sh index 69b930e1e..69b930e1e 100644..100755 --- a/kernel/tools/testing/selftests/memfd/run_fuse_test.sh +++ b/kernel/tools/testing/selftests/memfd/run_fuse_test.sh diff --git a/kernel/tools/testing/selftests/mount/Makefile b/kernel/tools/testing/selftests/mount/Makefile index 95580a973..5e35c9c50 100644 --- a/kernel/tools/testing/selftests/mount/Makefile +++ b/kernel/tools/testing/selftests/mount/Makefile @@ -9,7 +9,12 @@ unprivileged-remount-test: unprivileged-remount-test.c include ../lib.mk TEST_PROGS := unprivileged-remount-test -override RUN_TESTS := if [ -f /proc/self/uid_map ] ; then ./unprivileged-remount-test ; fi +override RUN_TESTS := if [ -f /proc/self/uid_map ] ; \ + then \ + ./unprivileged-remount-test ; \ + else \ + echo "WARN: No /proc/self/uid_map exist, test skipped." ; \ + fi override EMIT_TESTS := echo "$(RUN_TESTS)" clean: diff --git a/kernel/tools/testing/selftests/mqueue/Makefile b/kernel/tools/testing/selftests/mqueue/Makefile index 0e3b41eb8..eebac29ac 100644 --- a/kernel/tools/testing/selftests/mqueue/Makefile +++ b/kernel/tools/testing/selftests/mqueue/Makefile @@ -1,8 +1,8 @@ -CFLAGS = -O2 +CFLAGS += -O2 +LDLIBS = -lrt -lpthread -lpopt +TEST_PROGS := mq_open_tests mq_perf_tests -all: - $(CC) $(CFLAGS) mq_open_tests.c -o mq_open_tests -lrt - $(CC) $(CFLAGS) -o mq_perf_tests mq_perf_tests.c -lrt -lpthread -lpopt +all: $(TEST_PROGS) include ../lib.mk @@ -11,8 +11,6 @@ override define RUN_TESTS @./mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]" endef -TEST_PROGS := mq_open_tests mq_perf_tests - override define EMIT_TESTS echo "./mq_open_tests /test1 || echo \"selftests: mq_open_tests [FAIL]\"" echo "./mq_perf_tests || echo \"selftests: mq_perf_tests [FAIL]\"" diff --git a/kernel/tools/testing/selftests/mqueue/mq_open_tests.c b/kernel/tools/testing/selftests/mqueue/mq_open_tests.c index 9c1a5d359..e0a74bd20 100644 --- a/kernel/tools/testing/selftests/mqueue/mq_open_tests.c +++ b/kernel/tools/testing/selftests/mqueue/mq_open_tests.c @@ -31,6 +31,7 @@ #include <sys/resource.h> #include <sys/stat.h> #include <mqueue.h> +#include <error.h> static char *usage = "Usage:\n" diff --git a/kernel/tools/testing/selftests/mqueue/mq_perf_tests.c b/kernel/tools/testing/selftests/mqueue/mq_perf_tests.c index 8519e9ee9..8188f72de 100644 --- a/kernel/tools/testing/selftests/mqueue/mq_perf_tests.c +++ b/kernel/tools/testing/selftests/mqueue/mq_perf_tests.c @@ -37,6 +37,7 @@ #include <sys/stat.h> #include <mqueue.h> #include <popt.h> +#include <error.h> static char *usage = "Usage:\n" diff --git a/kernel/tools/testing/selftests/net/psock_fanout.c b/kernel/tools/testing/selftests/net/psock_fanout.c index 6f6733331..412459369 100644 --- a/kernel/tools/testing/selftests/net/psock_fanout.c +++ b/kernel/tools/testing/selftests/net/psock_fanout.c @@ -19,6 +19,8 @@ * - PACKET_FANOUT_LB * - PACKET_FANOUT_CPU * - PACKET_FANOUT_ROLLOVER + * - PACKET_FANOUT_CBPF + * - PACKET_FANOUT_EBPF * * Todo: * - functionality: PACKET_FANOUT_FLAG_DEFRAG @@ -44,7 +46,9 @@ #include <arpa/inet.h> #include <errno.h> #include <fcntl.h> +#include <linux/unistd.h> /* for __NR_bpf */ #include <linux/filter.h> +#include <linux/bpf.h> #include <linux/if_packet.h> #include <net/ethernet.h> #include <netinet/ip.h> @@ -91,6 +95,51 @@ static int sock_fanout_open(uint16_t typeflags, int num_packets) return fd; } +static void sock_fanout_set_ebpf(int fd) +{ + const int len_off = __builtin_offsetof(struct __sk_buff, len); + struct bpf_insn prog[] = { + { BPF_ALU64 | BPF_MOV | BPF_X, 6, 1, 0, 0 }, + { BPF_LDX | BPF_W | BPF_MEM, 0, 6, len_off, 0 }, + { BPF_JMP | BPF_JGE | BPF_K, 0, 0, 1, DATA_LEN }, + { BPF_JMP | BPF_JA | BPF_K, 0, 0, 4, 0 }, + { BPF_LD | BPF_B | BPF_ABS, 0, 0, 0, 0x50 }, + { BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 2, DATA_CHAR }, + { BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1, DATA_CHAR_1 }, + { BPF_ALU | BPF_MOV | BPF_K, 0, 0, 0, 0 }, + { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 } + }; + char log_buf[512]; + union bpf_attr attr; + int pfd; + + memset(&attr, 0, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.insns = (unsigned long) prog; + attr.insn_cnt = sizeof(prog) / sizeof(prog[0]); + attr.license = (unsigned long) "GPL"; + attr.log_buf = (unsigned long) log_buf, + attr.log_size = sizeof(log_buf), + attr.log_level = 1, + + pfd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); + if (pfd < 0) { + perror("bpf"); + fprintf(stderr, "bpf verifier:\n%s\n", log_buf); + exit(1); + } + + if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) { + perror("fanout data ebpf"); + exit(1); + } + + if (close(pfd)) { + perror("close ebpf"); + exit(1); + } +} + static char *sock_fanout_open_ring(int fd) { struct tpacket_req req = { @@ -115,8 +164,8 @@ static char *sock_fanout_open_ring(int fd) ring = mmap(0, req.tp_block_size * req.tp_block_nr, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if (!ring) { - fprintf(stderr, "packetsock ring mmap\n"); + if (ring == MAP_FAILED) { + perror("packetsock ring mmap"); exit(1); } @@ -209,6 +258,7 @@ static int test_datapath(uint16_t typeflags, int port_off, { const int expect0[] = { 0, 0 }; char *rings[2]; + uint8_t type = typeflags & 0xFF; int fds[2], fds_udp[2][2], ret; fprintf(stderr, "test: datapath 0x%hx\n", typeflags); @@ -219,6 +269,11 @@ static int test_datapath(uint16_t typeflags, int port_off, fprintf(stderr, "ERROR: failed open\n"); exit(1); } + if (type == PACKET_FANOUT_CBPF) + sock_setfilter(fds[0], SOL_PACKET, PACKET_FANOUT_DATA); + else if (type == PACKET_FANOUT_EBPF) + sock_fanout_set_ebpf(fds[0]); + rings[0] = sock_fanout_open_ring(fds[0]); rings[1] = sock_fanout_open_ring(fds[1]); pair_udp_open(fds_udp[0], PORT_BASE); @@ -227,11 +282,11 @@ static int test_datapath(uint16_t typeflags, int port_off, /* Send data, but not enough to overflow a queue */ pair_udp_send(fds_udp[0], 15); - pair_udp_send(fds_udp[1], 5); + pair_udp_send_char(fds_udp[1], 5, DATA_CHAR_1); ret = sock_fanout_read(fds, rings, expect1); /* Send more data, overflow the queue */ - pair_udp_send(fds_udp[0], 15); + pair_udp_send_char(fds_udp[0], 15, DATA_CHAR_1); /* TODO: ensure consistent order between expect1 and expect2 */ ret |= sock_fanout_read(fds, rings, expect2); @@ -272,9 +327,10 @@ int main(int argc, char **argv) const int expect_hash[2][2] = { { 15, 5 }, { 20, 5 } }; const int expect_hash_rb[2][2] = { { 15, 5 }, { 20, 15 } }; const int expect_lb[2][2] = { { 10, 10 }, { 18, 17 } }; - const int expect_rb[2][2] = { { 20, 0 }, { 20, 15 } }; + const int expect_rb[2][2] = { { 15, 5 }, { 20, 15 } }; const int expect_cpu0[2][2] = { { 20, 0 }, { 20, 0 } }; const int expect_cpu1[2][2] = { { 0, 20 }, { 0, 20 } }; + const int expect_bpf[2][2] = { { 15, 5 }, { 15, 20 } }; int port_off = 2, tries = 5, ret; test_control_single(); @@ -296,6 +352,11 @@ int main(int argc, char **argv) ret |= test_datapath(PACKET_FANOUT_ROLLOVER, port_off, expect_rb[0], expect_rb[1]); + ret |= test_datapath(PACKET_FANOUT_CBPF, + port_off, expect_bpf[0], expect_bpf[1]); + ret |= test_datapath(PACKET_FANOUT_EBPF, + port_off, expect_bpf[0], expect_bpf[1]); + set_cpuaffinity(0); ret |= test_datapath(PACKET_FANOUT_CPU, port_off, expect_cpu0[0], expect_cpu0[1]); diff --git a/kernel/tools/testing/selftests/net/psock_lib.h b/kernel/tools/testing/selftests/net/psock_lib.h index 37da54ac8..24bc7ec1b 100644 --- a/kernel/tools/testing/selftests/net/psock_lib.h +++ b/kernel/tools/testing/selftests/net/psock_lib.h @@ -30,6 +30,7 @@ #define DATA_LEN 100 #define DATA_CHAR 'a' +#define DATA_CHAR_1 'b' #define PORT_BASE 8000 @@ -37,29 +38,36 @@ # define __maybe_unused __attribute__ ((__unused__)) #endif -static __maybe_unused void pair_udp_setfilter(int fd) +static __maybe_unused void sock_setfilter(int fd, int lvl, int optnum) { struct sock_filter bpf_filter[] = { { 0x80, 0, 0, 0x00000000 }, /* LD pktlen */ - { 0x35, 0, 5, DATA_LEN }, /* JGE DATA_LEN [f goto nomatch]*/ + { 0x35, 0, 4, DATA_LEN }, /* JGE DATA_LEN [f goto nomatch]*/ { 0x30, 0, 0, 0x00000050 }, /* LD ip[80] */ - { 0x15, 0, 3, DATA_CHAR }, /* JEQ DATA_CHAR [f goto nomatch]*/ - { 0x30, 0, 0, 0x00000051 }, /* LD ip[81] */ - { 0x15, 0, 1, DATA_CHAR }, /* JEQ DATA_CHAR [f goto nomatch]*/ + { 0x15, 1, 0, DATA_CHAR }, /* JEQ DATA_CHAR [t goto match]*/ + { 0x15, 0, 1, DATA_CHAR_1}, /* JEQ DATA_CHAR_1 [t goto match]*/ { 0x06, 0, 0, 0x00000060 }, /* RET match */ { 0x06, 0, 0, 0x00000000 }, /* RET no match */ }; struct sock_fprog bpf_prog; + if (lvl == SOL_PACKET && optnum == PACKET_FANOUT_DATA) + bpf_filter[5].code = 0x16; /* RET A */ + bpf_prog.filter = bpf_filter; bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter); - if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_prog, + if (setsockopt(fd, lvl, optnum, &bpf_prog, sizeof(bpf_prog))) { perror("setsockopt SO_ATTACH_FILTER"); exit(1); } } +static __maybe_unused void pair_udp_setfilter(int fd) +{ + sock_setfilter(fd, SOL_SOCKET, SO_ATTACH_FILTER); +} + static __maybe_unused void pair_udp_open(int fds[], uint16_t port) { struct sockaddr_in saddr, daddr; @@ -96,11 +104,11 @@ static __maybe_unused void pair_udp_open(int fds[], uint16_t port) } } -static __maybe_unused void pair_udp_send(int fds[], int num) +static __maybe_unused void pair_udp_send_char(int fds[], int num, char payload) { char buf[DATA_LEN], rbuf[DATA_LEN]; - memset(buf, DATA_CHAR, sizeof(buf)); + memset(buf, payload, sizeof(buf)); while (num--) { /* Should really handle EINTR and EAGAIN */ if (write(fds[0], buf, sizeof(buf)) != sizeof(buf)) { @@ -118,6 +126,11 @@ static __maybe_unused void pair_udp_send(int fds[], int num) } } +static __maybe_unused void pair_udp_send(int fds[], int num) +{ + return pair_udp_send_char(fds, num, DATA_CHAR); +} + static __maybe_unused void pair_udp_close(int fds[]) { close(fds[0]); diff --git a/kernel/tools/testing/selftests/powerpc/Makefile b/kernel/tools/testing/selftests/powerpc/Makefile index 5ad042345..0c2706bda 100644 --- a/kernel/tools/testing/selftests/powerpc/Makefile +++ b/kernel/tools/testing/selftests/powerpc/Makefile @@ -12,7 +12,17 @@ CFLAGS := -Wall -O2 -flto -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CUR export CFLAGS -SUB_DIRS = pmu copyloops mm tm primitives stringloops vphn switch_endian +SUB_DIRS = benchmarks \ + copyloops \ + dscr \ + mm \ + pmu \ + primitives \ + stringloops \ + switch_endian \ + syscalls \ + tm \ + vphn endif diff --git a/kernel/tools/testing/selftests/powerpc/benchmarks/.gitignore b/kernel/tools/testing/selftests/powerpc/benchmarks/.gitignore new file mode 100644 index 000000000..b4709ea58 --- /dev/null +++ b/kernel/tools/testing/selftests/powerpc/benchmarks/.gitignore @@ -0,0 +1 @@ +gettimeofday diff --git a/kernel/tools/testing/selftests/powerpc/benchmarks/Makefile b/kernel/tools/testing/selftests/powerpc/benchmarks/Makefile new file mode 100644 index 000000000..5fa487020 --- /dev/null +++ b/kernel/tools/testing/selftests/powerpc/benchmarks/Makefile @@ -0,0 +1,12 @@ +TEST_PROGS := gettimeofday + +CFLAGS += -O2 + +all: $(TEST_PROGS) + +$(TEST_PROGS): ../harness.c + +include ../../lib.mk + +clean: + rm -f $(TEST_PROGS) *.o diff --git a/kernel/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c b/kernel/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c new file mode 100644 index 000000000..3af3c21e8 --- /dev/null +++ b/kernel/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c @@ -0,0 +1,31 @@ +/* + * Copyright 2015, Anton Blanchard, IBM Corp. + * Licensed under GPLv2. + */ + +#include <sys/time.h> +#include <stdio.h> + +#include "utils.h" + +static int test_gettimeofday(void) +{ + int i; + + struct timeval tv_start, tv_end; + + gettimeofday(&tv_start, NULL); + + for(i = 0; i < 100000000; i++) { + gettimeofday(&tv_end, NULL); + } + + printf("time = %.6f\n", tv_end.tv_sec - tv_start.tv_sec + (tv_end.tv_usec - tv_start.tv_usec) * 1e-6); + + return 0; +} + +int main(void) +{ + return test_harness(test_gettimeofday, "gettimeofday"); +} diff --git a/kernel/tools/testing/selftests/powerpc/dscr/.gitignore b/kernel/tools/testing/selftests/powerpc/dscr/.gitignore new file mode 100644 index 000000000..b585c6c15 --- /dev/null +++ b/kernel/tools/testing/selftests/powerpc/dscr/.gitignore @@ -0,0 +1,7 @@ +dscr_default_test +dscr_explicit_test +dscr_inherit_exec_test +dscr_inherit_test +dscr_sysfs_test +dscr_sysfs_thread_test +dscr_user_test diff --git a/kernel/tools/testing/selftests/powerpc/dscr/Makefile b/kernel/tools/testing/selftests/powerpc/dscr/Makefile new file mode 100644 index 000000000..49327ee84 --- /dev/null +++ b/kernel/tools/testing/selftests/powerpc/dscr/Makefile @@ -0,0 +1,14 @@ +TEST_PROGS := dscr_default_test dscr_explicit_test dscr_user_test \ + dscr_inherit_test dscr_inherit_exec_test dscr_sysfs_test \ + dscr_sysfs_thread_test + +dscr_default_test: LDLIBS += -lpthread + +all: $(TEST_PROGS) + +$(TEST_PROGS): ../harness.c + +include ../../lib.mk + +clean: + rm -f $(TEST_PROGS) *.o diff --git a/kernel/tools/testing/selftests/powerpc/dscr/dscr.h b/kernel/tools/testing/selftests/powerpc/dscr/dscr.h new file mode 100644 index 000000000..a36af1b2c --- /dev/null +++ b/kernel/tools/testing/selftests/powerpc/dscr/dscr.h @@ -0,0 +1,127 @@ +/* + * POWER Data Stream Control Register (DSCR) + * + * This header file contains helper functions and macros + * required for all the DSCR related test cases. + * + * Copyright 2012, Anton Blanchard, IBM Corporation. + * Copyright 2015, Anshuman Khandual, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ +#ifndef _SELFTESTS_POWERPC_DSCR_DSCR_H +#define _SELFTESTS_POWERPC_DSCR_DSCR_H + +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <dirent.h> +#include <pthread.h> +#include <sched.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/wait.h> + +#include "utils.h" + +#define SPRN_DSCR 0x11 /* Privilege state SPR */ +#define SPRN_DSCR_USR 0x03 /* Problem state SPR */ +#define THREADS 100 /* Max threads */ +#define COUNT 100 /* Max iterations */ +#define DSCR_MAX 16 /* Max DSCR value */ +#define LEN_MAX 100 /* Max name length */ + +#define DSCR_DEFAULT "/sys/devices/system/cpu/dscr_default" +#define CPU_PATH "/sys/devices/system/cpu/" + +#define rmb() asm volatile("lwsync":::"memory") +#define wmb() asm volatile("lwsync":::"memory") + +#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) + +/* Prilvilege state DSCR access */ +inline unsigned long get_dscr(void) +{ + unsigned long ret; + + asm volatile("mfspr %0,%1" : "=r" (ret): "i" (SPRN_DSCR)); + + return ret; +} + +inline void set_dscr(unsigned long val) +{ + asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); +} + +/* Problem state DSCR access */ +inline unsigned long get_dscr_usr(void) +{ + unsigned long ret; + + asm volatile("mfspr %0,%1" : "=r" (ret): "i" (SPRN_DSCR_USR)); + + return ret; +} + +inline void set_dscr_usr(unsigned long val) +{ + asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR_USR)); +} + +/* Default DSCR access */ +unsigned long get_default_dscr(void) +{ + int fd = -1, ret; + char buf[16]; + unsigned long val; + + if (fd == -1) { + fd = open(DSCR_DEFAULT, O_RDONLY); + if (fd == -1) { + perror("open() failed"); + exit(1); + } + } + memset(buf, 0, sizeof(buf)); + lseek(fd, 0, SEEK_SET); + ret = read(fd, buf, sizeof(buf)); + if (ret == -1) { + perror("read() failed"); + exit(1); + } + sscanf(buf, "%lx", &val); + close(fd); + return val; +} + +void set_default_dscr(unsigned long val) +{ + int fd = -1, ret; + char buf[16]; + + if (fd == -1) { + fd = open(DSCR_DEFAULT, O_RDWR); + if (fd == -1) { + perror("open() failed"); + exit(1); + } + } + sprintf(buf, "%lx\n", val); + ret = write(fd, buf, strlen(buf)); + if (ret == -1) { + perror("write() failed"); + exit(1); + } + close(fd); +} + +double uniform_deviate(int seed) +{ + return seed * (1.0 / (RAND_MAX + 1.0)); +} +#endif /* _SELFTESTS_POWERPC_DSCR_DSCR_H */ diff --git a/kernel/tools/testing/selftests/powerpc/dscr/dscr_default_test.c b/kernel/tools/testing/selftests/powerpc/dscr/dscr_default_test.c new file mode 100644 index 000000000..df17c3bab --- /dev/null +++ b/kernel/tools/testing/selftests/powerpc/dscr/dscr_default_test.c @@ -0,0 +1,127 @@ +/* + * POWER Data Stream Control Register (DSCR) default test + * + * This test modifies the system wide default DSCR through + * it's sysfs interface and then verifies that all threads + * see the correct changed DSCR value immediately. + * + * Copyright 2012, Anton Blanchard, IBM Corporation. + * Copyright 2015, Anshuman Khandual, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ +#include "dscr.h" + +static unsigned long dscr; /* System DSCR default */ +static unsigned long sequence; +static unsigned long result[THREADS]; + +static void *do_test(void *in) +{ + unsigned long thread = (unsigned long)in; + unsigned long i; + + for (i = 0; i < COUNT; i++) { + unsigned long d, cur_dscr, cur_dscr_usr; + unsigned long s1, s2; + + s1 = ACCESS_ONCE(sequence); + if (s1 & 1) + continue; + rmb(); + + d = dscr; + cur_dscr = get_dscr(); + cur_dscr_usr = get_dscr_usr(); + + rmb(); + s2 = sequence; + + if (s1 != s2) + continue; + + if (cur_dscr != d) { + fprintf(stderr, "thread %ld kernel DSCR should be %ld " + "but is %ld\n", thread, d, cur_dscr); + result[thread] = 1; + pthread_exit(&result[thread]); + } + + if (cur_dscr_usr != d) { + fprintf(stderr, "thread %ld user DSCR should be %ld " + "but is %ld\n", thread, d, cur_dscr_usr); + result[thread] = 1; + pthread_exit(&result[thread]); + } + } + result[thread] = 0; + pthread_exit(&result[thread]); +} + +int dscr_default(void) +{ + pthread_t threads[THREADS]; + unsigned long i, *status[THREADS]; + unsigned long orig_dscr_default; + + orig_dscr_default = get_default_dscr(); + + /* Initial DSCR default */ + dscr = 1; + set_default_dscr(dscr); + + /* Spawn all testing threads */ + for (i = 0; i < THREADS; i++) { + if (pthread_create(&threads[i], NULL, do_test, (void *)i)) { + perror("pthread_create() failed"); + goto fail; + } + } + + srand(getpid()); + + /* Keep changing the DSCR default */ + for (i = 0; i < COUNT; i++) { + double ret = uniform_deviate(rand()); + + if (ret < 0.0001) { + sequence++; + wmb(); + + dscr++; + if (dscr > DSCR_MAX) + dscr = 0; + + set_default_dscr(dscr); + + wmb(); + sequence++; + } + } + + /* Individual testing thread exit status */ + for (i = 0; i < THREADS; i++) { + if (pthread_join(threads[i], (void **)&(status[i]))) { + perror("pthread_join() failed"); + goto fail; + } + + if (*status[i]) { + printf("%ldth thread failed to join with %ld status\n", + i, *status[i]); + goto fail; + } + } + set_default_dscr(orig_dscr_default); + return 0; +fail: + set_default_dscr(orig_dscr_default); + return 1; +} + +int main(int argc, char *argv[]) +{ + return test_harness(dscr_default, "dscr_default_test"); +} diff --git a/kernel/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c b/kernel/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c new file mode 100644 index 000000000..ad9c3ec26 --- /dev/null +++ b/kernel/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c @@ -0,0 +1,71 @@ +/* + * POWER Data Stream Control Register (DSCR) explicit test + * + * This test modifies the DSCR value using mtspr instruction and + * verifies the change with mfspr instruction. It uses both the + * privilege state SPR and the problem state SPR for this purpose. + * + * When using the privilege state SPR, the instructions such as + * mfspr or mtspr are priviledged and the kernel emulates them + * for us. Instructions using problem state SPR can be exuecuted + * directly without any emulation if the HW supports them. Else + * they also get emulated by the kernel. + * + * Copyright 2012, Anton Blanchard, IBM Corporation. + * Copyright 2015, Anshuman Khandual, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ +#include "dscr.h" + +int dscr_explicit(void) +{ + unsigned long i, dscr = 0; + + srand(getpid()); + set_dscr(dscr); + + for (i = 0; i < COUNT; i++) { + unsigned long cur_dscr, cur_dscr_usr; + double ret = uniform_deviate(rand()); + + if (ret < 0.001) { + dscr++; + if (dscr > DSCR_MAX) + dscr = 0; + + set_dscr(dscr); + } + + cur_dscr = get_dscr(); + if (cur_dscr != dscr) { + fprintf(stderr, "Kernel DSCR should be %ld but " + "is %ld\n", dscr, cur_dscr); + return 1; + } + + ret = uniform_deviate(rand()); + if (ret < 0.001) { + dscr++; + if (dscr > DSCR_MAX) + dscr = 0; + + set_dscr_usr(dscr); + } + + cur_dscr_usr = get_dscr_usr(); + if (cur_dscr_usr != dscr) { + fprintf(stderr, "User DSCR should be %ld but " + "is %ld\n", dscr, cur_dscr_usr); + return 1; + } + } + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(dscr_explicit, "dscr_explicit_test"); +} diff --git a/kernel/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c b/kernel/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c new file mode 100644 index 000000000..8265504de --- /dev/null +++ b/kernel/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c @@ -0,0 +1,117 @@ +/* + * POWER Data Stream Control Register (DSCR) fork exec test + * + * This testcase modifies the DSCR using mtspr, forks & execs and + * verifies that the child is using the changed DSCR using mfspr. + * + * When using the privilege state SPR, the instructions such as + * mfspr or mtspr are priviledged and the kernel emulates them + * for us. Instructions using problem state SPR can be exuecuted + * directly without any emulation if the HW supports them. Else + * they also get emulated by the kernel. + * + * Copyright 2012, Anton Blanchard, IBM Corporation. + * Copyright 2015, Anshuman Khandual, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ +#include "dscr.h" + +static char prog[LEN_MAX]; + +static void do_exec(unsigned long parent_dscr) +{ + unsigned long cur_dscr, cur_dscr_usr; + + cur_dscr = get_dscr(); + cur_dscr_usr = get_dscr_usr(); + + if (cur_dscr != parent_dscr) { + fprintf(stderr, "Parent DSCR %ld was not inherited " + "over exec (kernel value)\n", parent_dscr); + exit(1); + } + + if (cur_dscr_usr != parent_dscr) { + fprintf(stderr, "Parent DSCR %ld was not inherited " + "over exec (user value)\n", parent_dscr); + exit(1); + } + exit(0); +} + +int dscr_inherit_exec(void) +{ + unsigned long i, dscr = 0; + pid_t pid; + + for (i = 0; i < COUNT; i++) { + dscr++; + if (dscr > DSCR_MAX) + dscr = 0; + + if (dscr == get_default_dscr()) + continue; + + if (i % 2 == 0) + set_dscr_usr(dscr); + else + set_dscr(dscr); + + /* + * XXX: Force a context switch out so that DSCR + * current value is copied into the thread struct + * which is required for the child to inherit the + * changed value. + */ + sleep(1); + + pid = fork(); + if (pid == -1) { + perror("fork() failed"); + exit(1); + } else if (pid) { + int status; + + if (waitpid(pid, &status, 0) == -1) { + perror("waitpid() failed"); + exit(1); + } + + if (!WIFEXITED(status)) { + fprintf(stderr, "Child didn't exit cleanly\n"); + exit(1); + } + + if (WEXITSTATUS(status) != 0) { + fprintf(stderr, "Child didn't exit cleanly\n"); + return 1; + } + } else { + char dscr_str[16]; + + sprintf(dscr_str, "%ld", dscr); + execlp(prog, prog, "exec", dscr_str, NULL); + exit(1); + } + } + return 0; +} + +int main(int argc, char *argv[]) +{ + if (argc == 3 && !strcmp(argv[1], "exec")) { + unsigned long parent_dscr; + + parent_dscr = atoi(argv[2]); + do_exec(parent_dscr); + } else if (argc != 1) { + fprintf(stderr, "Usage: %s\n", argv[0]); + exit(1); + } + + strncpy(prog, argv[0], strlen(argv[0])); + return test_harness(dscr_inherit_exec, "dscr_inherit_exec_test"); +} diff --git a/kernel/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c b/kernel/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c new file mode 100644 index 000000000..4e414caf7 --- /dev/null +++ b/kernel/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c @@ -0,0 +1,95 @@ +/* + * POWER Data Stream Control Register (DSCR) fork test + * + * This testcase modifies the DSCR using mtspr, forks and then + * verifies that the child process has the correct changed DSCR + * value using mfspr. + * + * When using the privilege state SPR, the instructions such as + * mfspr or mtspr are priviledged and the kernel emulates them + * for us. Instructions using problem state SPR can be exuecuted + * directly without any emulation if the HW supports them. Else + * they also get emulated by the kernel. + * + * Copyright 2012, Anton Blanchard, IBM Corporation. + * Copyright 2015, Anshuman Khandual, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ +#include "dscr.h" + +int dscr_inherit(void) +{ + unsigned long i, dscr = 0; + pid_t pid; + + srand(getpid()); + set_dscr(dscr); + + for (i = 0; i < COUNT; i++) { + unsigned long cur_dscr, cur_dscr_usr; + + dscr++; + if (dscr > DSCR_MAX) + dscr = 0; + + if (i % 2 == 0) + set_dscr_usr(dscr); + else + set_dscr(dscr); + + /* + * XXX: Force a context switch out so that DSCR + * current value is copied into the thread struct + * which is required for the child to inherit the + * changed value. + */ + sleep(1); + + pid = fork(); + if (pid == -1) { + perror("fork() failed"); + exit(1); + } else if (pid) { + int status; + + if (waitpid(pid, &status, 0) == -1) { + perror("waitpid() failed"); + exit(1); + } + + if (!WIFEXITED(status)) { + fprintf(stderr, "Child didn't exit cleanly\n"); + exit(1); + } + + if (WEXITSTATUS(status) != 0) { + fprintf(stderr, "Child didn't exit cleanly\n"); + return 1; + } + } else { + cur_dscr = get_dscr(); + if (cur_dscr != dscr) { + fprintf(stderr, "Kernel DSCR should be %ld " + "but is %ld\n", dscr, cur_dscr); + exit(1); + } + + cur_dscr_usr = get_dscr_usr(); + if (cur_dscr_usr != dscr) { + fprintf(stderr, "User DSCR should be %ld " + "but is %ld\n", dscr, cur_dscr_usr); + exit(1); + } + exit(0); + } + } + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(dscr_inherit, "dscr_inherit_test"); +} diff --git a/kernel/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c b/kernel/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c new file mode 100644 index 000000000..17fb1b43c --- /dev/null +++ b/kernel/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c @@ -0,0 +1,97 @@ +/* + * POWER Data Stream Control Register (DSCR) sysfs interface test + * + * This test updates to system wide DSCR default through the sysfs interface + * and then verifies that all the CPU specific DSCR defaults are updated as + * well verified from their sysfs interfaces. + * + * Copyright 2015, Anshuman Khandual, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ +#include "dscr.h" + +static int check_cpu_dscr_default(char *file, unsigned long val) +{ + char buf[10]; + int fd, rc; + + fd = open(file, O_RDWR); + if (fd == -1) { + perror("open() failed"); + return 1; + } + + rc = read(fd, buf, sizeof(buf)); + if (rc == -1) { + perror("read() failed"); + return 1; + } + close(fd); + + buf[rc] = '\0'; + if (strtol(buf, NULL, 16) != val) { + printf("DSCR match failed: %ld (system) %ld (cpu)\n", + val, strtol(buf, NULL, 16)); + return 1; + } + return 0; +} + +static int check_all_cpu_dscr_defaults(unsigned long val) +{ + DIR *sysfs; + struct dirent *dp; + char file[LEN_MAX]; + + sysfs = opendir(CPU_PATH); + if (!sysfs) { + perror("opendir() failed"); + return 1; + } + + while ((dp = readdir(sysfs))) { + if (!(dp->d_type & DT_DIR)) + continue; + if (!strcmp(dp->d_name, "cpuidle")) + continue; + if (!strstr(dp->d_name, "cpu")) + continue; + + sprintf(file, "%s%s/dscr", CPU_PATH, dp->d_name); + if (access(file, F_OK)) + continue; + + if (check_cpu_dscr_default(file, val)) + return 1; + } + closedir(sysfs); + return 0; +} + +int dscr_sysfs(void) +{ + unsigned long orig_dscr_default; + int i, j; + + orig_dscr_default = get_default_dscr(); + for (i = 0; i < COUNT; i++) { + for (j = 0; j < DSCR_MAX; j++) { + set_default_dscr(j); + if (check_all_cpu_dscr_defaults(j)) + goto fail; + } + } + set_default_dscr(orig_dscr_default); + return 0; +fail: + set_default_dscr(orig_dscr_default); + return 1; +} + +int main(int argc, char *argv[]) +{ + return test_harness(dscr_sysfs, "dscr_sysfs_test"); +} diff --git a/kernel/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c b/kernel/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c new file mode 100644 index 000000000..ad97b592e --- /dev/null +++ b/kernel/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c @@ -0,0 +1,80 @@ +/* + * POWER Data Stream Control Register (DSCR) sysfs thread test + * + * This test updates the system wide DSCR default value through + * sysfs interface which should then update all the CPU specific + * DSCR default values which must also be then visible to threads + * executing on individual CPUs on the system. + * + * Copyright 2015, Anshuman Khandual, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ +#define _GNU_SOURCE +#include "dscr.h" + +static int test_thread_dscr(unsigned long val) +{ + unsigned long cur_dscr, cur_dscr_usr; + + cur_dscr = get_dscr(); + cur_dscr_usr = get_dscr_usr(); + + if (val != cur_dscr) { + printf("[cpu %d] Kernel DSCR should be %ld but is %ld\n", + sched_getcpu(), val, cur_dscr); + return 1; + } + + if (val != cur_dscr_usr) { + printf("[cpu %d] User DSCR should be %ld but is %ld\n", + sched_getcpu(), val, cur_dscr_usr); + return 1; + } + return 0; +} + +static int check_cpu_dscr_thread(unsigned long val) +{ + cpu_set_t mask; + int cpu; + + for (cpu = 0; cpu < CPU_SETSIZE; cpu++) { + CPU_ZERO(&mask); + CPU_SET(cpu, &mask); + if (sched_setaffinity(0, sizeof(mask), &mask)) + continue; + + if (test_thread_dscr(val)) + return 1; + } + return 0; + +} + +int dscr_sysfs_thread(void) +{ + unsigned long orig_dscr_default; + int i, j; + + orig_dscr_default = get_default_dscr(); + for (i = 0; i < COUNT; i++) { + for (j = 0; j < DSCR_MAX; j++) { + set_default_dscr(j); + if (check_cpu_dscr_thread(j)) + goto fail; + } + } + set_default_dscr(orig_dscr_default); + return 0; +fail: + set_default_dscr(orig_dscr_default); + return 1; +} + +int main(int argc, char *argv[]) +{ + return test_harness(dscr_sysfs_thread, "dscr_sysfs_thread_test"); +} diff --git a/kernel/tools/testing/selftests/powerpc/dscr/dscr_user_test.c b/kernel/tools/testing/selftests/powerpc/dscr/dscr_user_test.c new file mode 100644 index 000000000..77d16b5e7 --- /dev/null +++ b/kernel/tools/testing/selftests/powerpc/dscr/dscr_user_test.c @@ -0,0 +1,61 @@ +/* + * POWER Data Stream Control Register (DSCR) SPR test + * + * This test modifies the DSCR value through both the SPR number + * based mtspr instruction and then makes sure that the same is + * reflected through mfspr instruction using either of the SPR + * numbers. + * + * When using the privilege state SPR, the instructions such as + * mfspr or mtspr are priviledged and the kernel emulates them + * for us. Instructions using problem state SPR can be exuecuted + * directly without any emulation if the HW supports them. Else + * they also get emulated by the kernel. + * + * Copyright 2013, Anton Blanchard, IBM Corporation. + * Copyright 2015, Anshuman Khandual, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ +#include "dscr.h" + +static int check_dscr(char *str) +{ + unsigned long cur_dscr, cur_dscr_usr; + + cur_dscr = get_dscr(); + cur_dscr_usr = get_dscr_usr(); + if (cur_dscr != cur_dscr_usr) { + printf("%s set, kernel get %lx != user get %lx\n", + str, cur_dscr, cur_dscr_usr); + return 1; + } + return 0; +} + +int dscr_user(void) +{ + int i; + + check_dscr(""); + + for (i = 0; i < COUNT; i++) { + set_dscr(i); + if (check_dscr("kernel")) + return 1; + } + + for (i = 0; i < COUNT; i++) { + set_dscr_usr(i); + if (check_dscr("user")) + return 1; + } + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(dscr_user, "dscr_user_test"); +} diff --git a/kernel/tools/testing/selftests/powerpc/mm/Makefile b/kernel/tools/testing/selftests/powerpc/mm/Makefile index 41cc3ed66..ee179e223 100644 --- a/kernel/tools/testing/selftests/powerpc/mm/Makefile +++ b/kernel/tools/testing/selftests/powerpc/mm/Makefile @@ -2,8 +2,9 @@ noarg: $(MAKE) -C ../ TEST_PROGS := hugetlb_vs_thp_test subpage_prot +TEST_FILES := tempfile -all: $(TEST_PROGS) tempfile +all: $(TEST_PROGS) $(TEST_FILES) $(TEST_PROGS): ../harness.c diff --git a/kernel/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c b/kernel/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c index 66ea765c0..94110b1dc 100644 --- a/kernel/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c +++ b/kernel/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c @@ -63,6 +63,8 @@ int back_to_back_ebbs(void) { struct event event; + SKIP_IF(!ebb_is_supported()); + event_init_named(&event, 0x1001e, "cycles"); event_leader_ebb_init(&event); diff --git a/kernel/tools/testing/selftests/powerpc/pmu/ebb/close_clears_pmcc_test.c b/kernel/tools/testing/selftests/powerpc/pmu/ebb/close_clears_pmcc_test.c index 0f0423dba..ac18cf617 100644 --- a/kernel/tools/testing/selftests/powerpc/pmu/ebb/close_clears_pmcc_test.c +++ b/kernel/tools/testing/selftests/powerpc/pmu/ebb/close_clears_pmcc_test.c @@ -20,6 +20,8 @@ int close_clears_pmcc(void) { struct event event; + SKIP_IF(!ebb_is_supported()); + event_init_named(&event, 0x1001e, "cycles"); event_leader_ebb_init(&event); diff --git a/kernel/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c b/kernel/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c index d3ed64d5d..f0632e7fd 100644 --- a/kernel/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c +++ b/kernel/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c @@ -43,6 +43,8 @@ int cpu_event_pinned_vs_ebb(void) int cpu, rc; pid_t pid; + SKIP_IF(!ebb_is_supported()); + cpu = pick_online_cpu(); FAIL_IF(cpu < 0); FAIL_IF(bind_to_cpu(cpu)); diff --git a/kernel/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c b/kernel/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c index 8b972c2aa..33e56a234 100644 --- a/kernel/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c +++ b/kernel/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c @@ -41,6 +41,8 @@ int cpu_event_vs_ebb(void) int cpu, rc; pid_t pid; + SKIP_IF(!ebb_is_supported()); + cpu = pick_online_cpu(); FAIL_IF(cpu < 0); FAIL_IF(bind_to_cpu(cpu)); diff --git a/kernel/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c b/kernel/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c index 8590fc1bf..7c57a8d79 100644 --- a/kernel/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c +++ b/kernel/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c @@ -16,6 +16,8 @@ int cycles(void) { struct event event; + SKIP_IF(!ebb_is_supported()); + event_init_named(&event, 0x1001e, "cycles"); event_leader_ebb_init(&event); diff --git a/kernel/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c b/kernel/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c index 754b3f200..ecf5ee328 100644 --- a/kernel/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c +++ b/kernel/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c @@ -56,6 +56,8 @@ int cycles_with_freeze(void) uint64_t val; bool fc_cleared; + SKIP_IF(!ebb_is_supported()); + event_init_named(&event, 0x1001e, "cycles"); event_leader_ebb_init(&event); diff --git a/kernel/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c b/kernel/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c index d43029b08..c0faba520 100644 --- a/kernel/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c +++ b/kernel/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c @@ -26,6 +26,8 @@ int cycles_with_mmcr2(void) int i; bool bad_mmcr2; + SKIP_IF(!ebb_is_supported()); + event_init_named(&event, 0x1001e, "cycles"); event_leader_ebb_init(&event); diff --git a/kernel/tools/testing/selftests/powerpc/pmu/ebb/ebb.c b/kernel/tools/testing/selftests/powerpc/pmu/ebb/ebb.c index d7a72ce69..9729d9f90 100644 --- a/kernel/tools/testing/selftests/powerpc/pmu/ebb/ebb.c +++ b/kernel/tools/testing/selftests/powerpc/pmu/ebb/ebb.c @@ -13,6 +13,7 @@ #include <stdlib.h> #include <string.h> #include <sys/ioctl.h> +#include <linux/auxvec.h> #include "trace.h" #include "reg.h" @@ -319,6 +320,16 @@ void ebb_global_disable(void) mb(); } +bool ebb_is_supported(void) +{ +#ifdef PPC_FEATURE2_EBB + /* EBB requires at least POWER8 */ + return ((long)get_auxv_entry(AT_HWCAP2) & PPC_FEATURE2_EBB); +#else + return false; +#endif +} + void event_ebb_init(struct event *e) { e->attr.config |= (1ull << 63); diff --git a/kernel/tools/testing/selftests/powerpc/pmu/ebb/ebb.h b/kernel/tools/testing/selftests/powerpc/pmu/ebb/ebb.h index e44eee5d9..f87e761f8 100644 --- a/kernel/tools/testing/selftests/powerpc/pmu/ebb/ebb.h +++ b/kernel/tools/testing/selftests/powerpc/pmu/ebb/ebb.h @@ -52,6 +52,7 @@ void standard_ebb_callee(void); int ebb_event_enable(struct event *e); void ebb_global_enable(void); void ebb_global_disable(void); +bool ebb_is_supported(void); void ebb_freeze_pmcs(void); void ebb_unfreeze_pmcs(void); void event_ebb_init(struct event *e); diff --git a/kernel/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_child_test.c b/kernel/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_child_test.c index c45f94814..1e7b7fe23 100644 --- a/kernel/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_child_test.c +++ b/kernel/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_child_test.c @@ -47,6 +47,8 @@ int ebb_on_child(void) struct event event; pid_t pid; + SKIP_IF(!ebb_is_supported()); + FAIL_IF(pipe(read_pipe.fds) == -1); FAIL_IF(pipe(write_pipe.fds) == -1); diff --git a/kernel/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c b/kernel/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c index 11acf1d55..a991d2ea8 100644 --- a/kernel/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c +++ b/kernel/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c @@ -54,6 +54,8 @@ int ebb_on_willing_child(void) struct event event; pid_t pid; + SKIP_IF(!ebb_is_supported()); + FAIL_IF(pipe(read_pipe.fds) == -1); FAIL_IF(pipe(write_pipe.fds) == -1); diff --git a/kernel/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c b/kernel/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c index be4dd5a4e..af20a2b36 100644 --- a/kernel/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c +++ b/kernel/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c @@ -41,6 +41,8 @@ int ebb_vs_cpu_event(void) int cpu, rc; pid_t pid; + SKIP_IF(!ebb_is_supported()); + cpu = pick_online_cpu(); FAIL_IF(cpu < 0); FAIL_IF(bind_to_cpu(cpu)); diff --git a/kernel/tools/testing/selftests/powerpc/pmu/ebb/event_attributes_test.c b/kernel/tools/testing/selftests/powerpc/pmu/ebb/event_attributes_test.c index 7e78153f0..7762ab26e 100644 --- a/kernel/tools/testing/selftests/powerpc/pmu/ebb/event_attributes_test.c +++ b/kernel/tools/testing/selftests/powerpc/pmu/ebb/event_attributes_test.c @@ -16,6 +16,8 @@ int event_attributes(void) { struct event event, leader; + SKIP_IF(!ebb_is_supported()); + event_init(&event, 0x1001e); event_leader_ebb_init(&event); /* Expected to succeed */ diff --git a/kernel/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c b/kernel/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c index 9e7af6e76..167135bd9 100644 --- a/kernel/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c +++ b/kernel/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c @@ -44,6 +44,8 @@ int fork_cleanup(void) { pid_t pid; + SKIP_IF(!ebb_is_supported()); + event_init_named(&event, 0x1001e, "cycles"); event_leader_ebb_init(&event); diff --git a/kernel/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c b/kernel/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c index f8190fa29..5da355135 100644 --- a/kernel/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c +++ b/kernel/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c @@ -111,6 +111,8 @@ int instruction_count(void) struct event event; uint64_t overhead; + SKIP_IF(!ebb_is_supported()); + event_init_named(&event, 0x400FA, "PM_RUN_INST_CMPL"); event_leader_ebb_init(&event); event.attr.exclude_kernel = 1; diff --git a/kernel/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c b/kernel/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c index 0c9dd9b2e..eb8acb78b 100644 --- a/kernel/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c +++ b/kernel/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c @@ -23,6 +23,8 @@ static int test_body(void) int i, orig_period, max_period; struct event event; + SKIP_IF(!ebb_is_supported()); + /* We use PMC4 to make sure the kernel switches all counters correctly */ event_init_named(&event, 0x40002, "instructions"); event_leader_ebb_init(&event); diff --git a/kernel/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c b/kernel/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c index 67d78af32..6ff8c8ff2 100644 --- a/kernel/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c +++ b/kernel/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c @@ -18,6 +18,8 @@ int multi_counter(void) struct event events[6]; int i, group_fd; + SKIP_IF(!ebb_is_supported()); + event_init_named(&events[0], 0x1001C, "PM_CMPLU_STALL_THRD"); event_init_named(&events[1], 0x2D016, "PM_CMPLU_STALL_FXU"); event_init_named(&events[2], 0x30006, "PM_CMPLU_STALL_OTHER_CMPL"); diff --git a/kernel/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c b/kernel/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c index b8dc371f9..037cb6154 100644 --- a/kernel/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c +++ b/kernel/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c @@ -79,6 +79,8 @@ int multi_ebb_procs(void) pid_t pids[NR_CHILDREN]; int cpu, rc, i; + SKIP_IF(!ebb_is_supported()); + cpu = pick_online_cpu(); FAIL_IF(cpu < 0); FAIL_IF(bind_to_cpu(cpu)); diff --git a/kernel/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c b/kernel/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c index 2f9bf8edf..8341d7778 100644 --- a/kernel/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c +++ b/kernel/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c @@ -19,6 +19,8 @@ static int no_handler_test(void) u64 val; int i; + SKIP_IF(!ebb_is_supported()); + event_init_named(&event, 0x1001e, "cycles"); event_leader_ebb_init(&event); diff --git a/kernel/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c b/kernel/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c index 986500fd2..c5fa64790 100644 --- a/kernel/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c +++ b/kernel/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c @@ -58,6 +58,8 @@ static int test_body(void) { struct event event; + SKIP_IF(!ebb_is_supported()); + event_init_named(&event, 0x1001e, "cycles"); event_leader_ebb_init(&event); diff --git a/kernel/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c b/kernel/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c index a503fa70c..c22860ab9 100644 --- a/kernel/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c +++ b/kernel/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c @@ -49,6 +49,8 @@ int pmc56_overflow(void) { struct event event; + SKIP_IF(!ebb_is_supported()); + /* Use PMC2 so we set PMCjCE, which enables PMC5/6 */ event_init(&event, 0x2001e); event_leader_ebb_init(&event); diff --git a/kernel/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c b/kernel/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c index 0cae66f65..5b1188f10 100644 --- a/kernel/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c +++ b/kernel/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c @@ -18,6 +18,8 @@ int reg_access(void) { uint64_t val, expected; + SKIP_IF(!ebb_is_supported()); + expected = 0x8000000100000000ull; mtspr(SPRN_BESCR, expected); val = mfspr(SPRN_BESCR); diff --git a/kernel/tools/testing/selftests/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c b/kernel/tools/testing/selftests/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c index d56607e4f..1846f4e84 100644 --- a/kernel/tools/testing/selftests/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c +++ b/kernel/tools/testing/selftests/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c @@ -42,6 +42,8 @@ int task_event_pinned_vs_ebb(void) pid_t pid; int rc; + SKIP_IF(!ebb_is_supported()); + FAIL_IF(pipe(read_pipe.fds) == -1); FAIL_IF(pipe(write_pipe.fds) == -1); diff --git a/kernel/tools/testing/selftests/powerpc/pmu/ebb/task_event_vs_ebb_test.c b/kernel/tools/testing/selftests/powerpc/pmu/ebb/task_event_vs_ebb_test.c index eba32196d..e3bc6e92a 100644 --- a/kernel/tools/testing/selftests/powerpc/pmu/ebb/task_event_vs_ebb_test.c +++ b/kernel/tools/testing/selftests/powerpc/pmu/ebb/task_event_vs_ebb_test.c @@ -40,6 +40,8 @@ int task_event_vs_ebb(void) pid_t pid; int rc; + SKIP_IF(!ebb_is_supported()); + FAIL_IF(pipe(read_pipe.fds) == -1); FAIL_IF(pipe(write_pipe.fds) == -1); diff --git a/kernel/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c b/kernel/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c index d1b647509..6cae06117 100644 --- a/kernel/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c +++ b/kernel/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c @@ -25,10 +25,19 @@ #define FIXUP_SECTION ".ex_fixup" +static inline unsigned long __fls(unsigned long x); + #include "word-at-a-time.h" #include "utils.h" +static inline unsigned long __fls(unsigned long x) +{ + int lz; + + asm (PPC_CNTLZL "%0,%1" : "=r" (lz) : "r" (x)); + return sizeof(unsigned long) - 1 - lz; +} static int page_size; static char *mem_region; diff --git a/kernel/tools/testing/selftests/powerpc/switch_endian/Makefile b/kernel/tools/testing/selftests/powerpc/switch_endian/Makefile index 081473db2..e21d10674 100644 --- a/kernel/tools/testing/selftests/powerpc/switch_endian/Makefile +++ b/kernel/tools/testing/selftests/powerpc/switch_endian/Makefile @@ -1,9 +1,8 @@ -CC := $(CROSS_COMPILE)gcc -PROGS := switch_endian_test +TEST_PROGS := switch_endian_test ASFLAGS += -O2 -Wall -g -nostdlib -m64 -all: $(PROGS) +all: $(TEST_PROGS) switch_endian_test: check-reversed.S @@ -13,12 +12,7 @@ check-reversed.o: check.o check-reversed.S: check-reversed.o hexdump -v -e '/1 ".byte 0x%02X\n"' $< > $@ -run_tests: all - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +include ../../lib.mk clean: - rm -f $(PROGS) *.o check-reversed.S - -.PHONY: all run_tests clean + rm -f $(TEST_PROGS) *.o check-reversed.S diff --git a/kernel/tools/testing/selftests/powerpc/syscalls/.gitignore b/kernel/tools/testing/selftests/powerpc/syscalls/.gitignore new file mode 100644 index 000000000..f0f3fcc9d --- /dev/null +++ b/kernel/tools/testing/selftests/powerpc/syscalls/.gitignore @@ -0,0 +1 @@ +ipc_unmuxed diff --git a/kernel/tools/testing/selftests/powerpc/syscalls/Makefile b/kernel/tools/testing/selftests/powerpc/syscalls/Makefile new file mode 100644 index 000000000..b35c7945b --- /dev/null +++ b/kernel/tools/testing/selftests/powerpc/syscalls/Makefile @@ -0,0 +1,12 @@ +TEST_PROGS := ipc_unmuxed + +CFLAGS += -I../../../../../usr/include + +all: $(TEST_PROGS) + +$(TEST_PROGS): ../harness.c + +include ../../lib.mk + +clean: + rm -f $(TEST_PROGS) *.o diff --git a/kernel/tools/testing/selftests/powerpc/syscalls/ipc.h b/kernel/tools/testing/selftests/powerpc/syscalls/ipc.h new file mode 100644 index 000000000..fbebc022e --- /dev/null +++ b/kernel/tools/testing/selftests/powerpc/syscalls/ipc.h @@ -0,0 +1,47 @@ +#ifdef __NR_semop +DO_TEST(semop, __NR_semop) +#endif + +#ifdef __NR_semget +DO_TEST(semget, __NR_semget) +#endif + +#ifdef __NR_semctl +DO_TEST(semctl, __NR_semctl) +#endif + +#ifdef __NR_semtimedop +DO_TEST(semtimedop, __NR_semtimedop) +#endif + +#ifdef __NR_msgsnd +DO_TEST(msgsnd, __NR_msgsnd) +#endif + +#ifdef __NR_msgrcv +DO_TEST(msgrcv, __NR_msgrcv) +#endif + +#ifdef __NR_msgget +DO_TEST(msgget, __NR_msgget) +#endif + +#ifdef __NR_msgctl +DO_TEST(msgctl, __NR_msgctl) +#endif + +#ifdef __NR_shmat +DO_TEST(shmat, __NR_shmat) +#endif + +#ifdef __NR_shmdt +DO_TEST(shmdt, __NR_shmdt) +#endif + +#ifdef __NR_shmget +DO_TEST(shmget, __NR_shmget) +#endif + +#ifdef __NR_shmctl +DO_TEST(shmctl, __NR_shmctl) +#endif diff --git a/kernel/tools/testing/selftests/powerpc/syscalls/ipc_unmuxed.c b/kernel/tools/testing/selftests/powerpc/syscalls/ipc_unmuxed.c new file mode 100644 index 000000000..2ac02706f --- /dev/null +++ b/kernel/tools/testing/selftests/powerpc/syscalls/ipc_unmuxed.c @@ -0,0 +1,61 @@ +/* + * Copyright 2015, Michael Ellerman, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This test simply tests that certain syscalls are implemented. It doesn't + * actually exercise their logic in any way. + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/syscall.h> + +#include "utils.h" + + +#define DO_TEST(_name, _num) \ +static int test_##_name(void) \ +{ \ + int rc; \ + printf("Testing " #_name); \ + errno = 0; \ + rc = syscall(_num, -1, 0, 0, 0, 0, 0); \ + printf("\treturned %d, errno %d\n", rc, errno); \ + return errno == ENOSYS; \ +} + +#include "ipc.h" +#undef DO_TEST + +static int ipc_unmuxed(void) +{ + int tests_done = 0; + +#define DO_TEST(_name, _num) \ + FAIL_IF(test_##_name()); \ + tests_done++; + +#include "ipc.h" +#undef DO_TEST + + /* + * If we ran no tests then it means none of the syscall numbers were + * defined, possibly because we were built against old headers. But it + * means we didn't really test anything, so instead of passing mark it + * as a skip to give the user a clue. + */ + SKIP_IF(tests_done == 0); + + return 0; +} + +int main(void) +{ + return test_harness(ipc_unmuxed, "ipc_unmuxed"); +} diff --git a/kernel/tools/testing/selftests/powerpc/tm/Makefile b/kernel/tools/testing/selftests/powerpc/tm/Makefile index 6bff955e1..4bea62a31 100644 --- a/kernel/tools/testing/selftests/powerpc/tm/Makefile +++ b/kernel/tools/testing/selftests/powerpc/tm/Makefile @@ -1,11 +1,11 @@ -TEST_PROGS := tm-resched-dscr +TEST_PROGS := tm-resched-dscr tm-syscall all: $(TEST_PROGS) $(TEST_PROGS): ../harness.c tm-syscall: tm-syscall-asm.S -tm-syscall: CFLAGS += -mhtm +tm-syscall: CFLAGS += -mhtm -I../../../../../usr/include include ../../lib.mk diff --git a/kernel/tools/testing/selftests/powerpc/tm/tm-syscall.c b/kernel/tools/testing/selftests/powerpc/tm/tm-syscall.c index 3ed8d4b25..e835bf7ec 100644 --- a/kernel/tools/testing/selftests/powerpc/tm/tm-syscall.c +++ b/kernel/tools/testing/selftests/powerpc/tm/tm-syscall.c @@ -77,12 +77,23 @@ pid_t getppid_tm(bool suspend) exit(-1); } +static inline bool have_htm_nosc(void) +{ +#ifdef PPC_FEATURE2_HTM_NOSC + return ((long)get_auxv_entry(AT_HWCAP2) & PPC_FEATURE2_HTM_NOSC); +#else + printf("PPC_FEATURE2_HTM_NOSC not defined, can't check AT_HWCAP2\n"); + return false; +#endif +} + int tm_syscall(void) { unsigned count = 0; struct timeval end, now; - SKIP_IF(!((long)get_auxv_entry(AT_HWCAP2) & PPC_FEATURE2_HTM)); + SKIP_IF(!have_htm_nosc()); + setbuf(stdout, NULL); printf("Testing transactional syscalls for %d seconds...\n", TEST_DURATION); diff --git a/kernel/tools/testing/selftests/powerpc/vphn/Makefile b/kernel/tools/testing/selftests/powerpc/vphn/Makefile index e539f775f..a485f2e28 100644 --- a/kernel/tools/testing/selftests/powerpc/vphn/Makefile +++ b/kernel/tools/testing/selftests/powerpc/vphn/Makefile @@ -1,15 +1,12 @@ -PROG := test-vphn +TEST_PROGS := test-vphn CFLAGS += -m64 -all: $(PROG) +all: $(TEST_PROGS) -$(PROG): ../harness.c +$(TEST_PROGS): ../harness.c -run_tests: all - ./$(PROG) +include ../../lib.mk clean: - rm -f $(PROG) - -.PHONY: all run_tests clean + rm -f $(TEST_PROGS) diff --git a/kernel/tools/testing/selftests/pstore/Makefile b/kernel/tools/testing/selftests/pstore/Makefile new file mode 100644 index 000000000..bd7abe24e --- /dev/null +++ b/kernel/tools/testing/selftests/pstore/Makefile @@ -0,0 +1,15 @@ +# Makefile for pstore selftests. +# Expects pstore backend is registered. + +all: + +TEST_PROGS := pstore_tests pstore_post_reboot_tests +TEST_FILES := common_tests pstore_crash_test + +include ../lib.mk + +run_crash: + @sh pstore_crash_test || { echo "pstore_crash_test: [FAIL]"; exit 1; } + +clean: + rm -rf logs/* *uuid diff --git a/kernel/tools/testing/selftests/pstore/common_tests b/kernel/tools/testing/selftests/pstore/common_tests new file mode 100755 index 000000000..3ea64d7cf --- /dev/null +++ b/kernel/tools/testing/selftests/pstore/common_tests @@ -0,0 +1,83 @@ +#!/bin/sh + +# common_tests - Shell script commonly used by pstore test scripts +# +# Copyright (C) Hitachi Ltd., 2015 +# Written by Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com> +# +# Released under the terms of the GPL v2. + +# Utilities +errexit() { # message + echo "Error: $1" 1>&2 + exit 1 +} + +absdir() { # file_path + (cd `dirname $1`; pwd) +} + +show_result() { # result_value + if [ $1 -eq 0 ]; then + prlog "ok" + else + prlog "FAIL" + rc=1 + fi +} + +check_files_exist() { # type of pstorefs file + if [ -e ${1}-${backend}-0 ]; then + prlog "ok" + for f in `ls ${1}-${backend}-*`; do + prlog -e "\t${f}" + done + else + prlog "FAIL" + rc=1 + fi +} + +operate_files() { # tested value, files, operation + if [ $1 -eq 0 ]; then + prlog + for f in $2; do + prlog -ne "\t${f} ... " + # execute operation + $3 $f + show_result $? + done + else + prlog " ... FAIL" + rc=1 + fi +} + +# Parameters +TEST_STRING_PATTERN="Testing pstore: uuid=" +UUID=`cat /proc/sys/kernel/random/uuid` +TOP_DIR=`absdir $0` +LOG_DIR=$TOP_DIR/logs/`date +%Y%m%d-%H%M%S`_${UUID}/ +REBOOT_FLAG=$TOP_DIR/reboot_flag + +# Preparing logs +LOG_FILE=$LOG_DIR/`basename $0`.log +mkdir -p $LOG_DIR || errexit "Failed to make a log directory: $LOG_DIR" +date > $LOG_FILE +prlog() { # messages + /bin/echo "$@" | tee -a $LOG_FILE +} + +# Starting tests +rc=0 +prlog "=== Pstore unit tests (`basename $0`) ===" +prlog "UUID="$UUID + +prlog -n "Checking pstore backend is registered ... " +backend=`cat /sys/module/pstore/parameters/backend` +show_result $? +prlog -e "\tbackend=${backend}" +prlog -e "\tcmdline=`cat /proc/cmdline`" +if [ $rc -ne 0 ]; then + exit 1 +fi diff --git a/kernel/tools/testing/selftests/pstore/pstore_crash_test b/kernel/tools/testing/selftests/pstore/pstore_crash_test new file mode 100755 index 000000000..1a4afe5c1 --- /dev/null +++ b/kernel/tools/testing/selftests/pstore/pstore_crash_test @@ -0,0 +1,30 @@ +#!/bin/sh + +# pstore_crash_test - Pstore test shell script which causes crash and reboot +# +# Copyright (C) Hitachi Ltd., 2015 +# Written by Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com> +# +# Released under the terms of the GPL v2. + +# exit if pstore backend is not registered +. ./common_tests + +prlog "Causing kernel crash ..." + +# enable all functions triggered by sysrq +echo 1 > /proc/sys/kernel/sysrq +# setting to reboot in 3 seconds after panic +echo 3 > /proc/sys/kernel/panic + +# save uuid file by different name because next test execution will replace it. +mv $TOP_DIR/uuid $TOP_DIR/prev_uuid + +# create a file as reboot flag +touch $REBOOT_FLAG +sync + +# cause crash +# Note: If you use kdump and want to see kmesg-* files after reboot, you should +# specify 'crash_kexec_post_notifiers' in 1st kernel's cmdline. +echo c > /proc/sysrq-trigger diff --git a/kernel/tools/testing/selftests/pstore/pstore_post_reboot_tests b/kernel/tools/testing/selftests/pstore/pstore_post_reboot_tests new file mode 100755 index 000000000..6ccb154cb --- /dev/null +++ b/kernel/tools/testing/selftests/pstore/pstore_post_reboot_tests @@ -0,0 +1,77 @@ +#!/bin/sh + +# pstore_post_reboot_tests - Check pstore's behavior after crash/reboot +# +# Copyright (C) Hitachi Ltd., 2015 +# Written by Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com> +# +# Released under the terms of the GPL v2. + +. ./common_tests + +if [ -e $REBOOT_FLAG ]; then + rm $REBOOT_FLAG +else + prlog "pstore_crash_test has not been executed yet. we skip further tests." + exit 0 +fi + +prlog -n "Mounting pstore filesystem ... " +mount_info=`grep pstore /proc/mounts` +if [ $? -eq 0 ]; then + mount_point=`echo ${mount_info} | cut -d' ' -f2 | head -n1` + prlog "ok" +else + mount none /sys/fs/pstore -t pstore + if [ $? -eq 0 ]; then + mount_point=`grep pstore /proc/mounts | cut -d' ' -f2 | head -n1` + prlog "ok" + else + prlog "FAIL" + exit 1 + fi +fi + +cd ${mount_point} + +prlog -n "Checking dmesg files exist in pstore filesystem ... " +check_files_exist dmesg + +prlog -n "Checking console files exist in pstore filesystem ... " +check_files_exist console + +prlog -n "Checking pmsg files exist in pstore filesystem ... " +check_files_exist pmsg + +prlog -n "Checking dmesg files contain oops end marker" +grep_end_trace() { + grep -q "\---\[ end trace" $1 +} +files=`ls dmesg-${backend}-*` +operate_files $? "$files" grep_end_trace + +prlog -n "Checking console file contains oops end marker ... " +grep -q "\---\[ end trace" console-${backend}-0 +show_result $? + +prlog -n "Checking pmsg file properly keeps the content written before crash ... " +prev_uuid=`cat $TOP_DIR/prev_uuid` +if [ $? -eq 0 ]; then + nr_matched=`grep -c "$TEST_STRING_PATTERN" pmsg-${backend}-0` + if [ $nr_matched -eq 1 ]; then + grep -q "$TEST_STRING_PATTERN"$prev_uuid pmsg-${backend}-0 + show_result $? + else + prlog "FAIL" + rc=1 + fi +else + prlog "FAIL" + rc=1 +fi + +prlog -n "Removing all files in pstore filesystem " +files=`ls *-${backend}-*` +operate_files $? "$files" rm + +exit $rc diff --git a/kernel/tools/testing/selftests/pstore/pstore_tests b/kernel/tools/testing/selftests/pstore/pstore_tests new file mode 100755 index 000000000..f25d2a349 --- /dev/null +++ b/kernel/tools/testing/selftests/pstore/pstore_tests @@ -0,0 +1,30 @@ +#!/bin/sh + +# pstore_tests - Check pstore's behavior before crash/reboot +# +# Copyright (C) Hitachi Ltd., 2015 +# Written by Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com> +# +# Released under the terms of the GPL v2. + +. ./common_tests + +prlog -n "Checking pstore console is registered ... " +dmesg | grep -q "console \[pstore" +show_result $? + +prlog -n "Checking /dev/pmsg0 exists ... " +test -e /dev/pmsg0 +show_result $? + +prlog -n "Writing unique string to /dev/pmsg0 ... " +if [ -e "/dev/pmsg0" ]; then + echo "${TEST_STRING_PATTERN}""$UUID" > /dev/pmsg0 + show_result $? + echo "$UUID" > $TOP_DIR/uuid +else + prlog "FAIL" + rc=1 +fi + +exit $rc diff --git a/kernel/tools/testing/selftests/rcutorture/bin/configinit.sh b/kernel/tools/testing/selftests/rcutorture/bin/configinit.sh index 15f1a17ca..3f81a1095 100755 --- a/kernel/tools/testing/selftests/rcutorture/bin/configinit.sh +++ b/kernel/tools/testing/selftests/rcutorture/bin/configinit.sh @@ -66,7 +66,7 @@ make $buildloc $TORTURE_DEFCONFIG > $builddir/Make.defconfig.out 2>&1 mv $builddir/.config $builddir/.config.sav sh $T/upd.sh < $builddir/.config.sav > $builddir/.config cp $builddir/.config $builddir/.config.new -yes '' | make $buildloc oldconfig > $builddir/Make.modconfig.out 2>&1 +yes '' | make $buildloc oldconfig > $builddir/Make.oldconfig.out 2> $builddir/Make.oldconfig.err # verify new config matches specification. configcheck.sh $builddir/.config $c diff --git a/kernel/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/kernel/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index 4f5b20f36..d86bdd6b6 100755 --- a/kernel/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/kernel/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -43,6 +43,10 @@ do if test -f "$i/console.log" then configcheck.sh $i/.config $i/ConfigFragment + if test -r $i/Make.oldconfig.err + then + cat $i/Make.oldconfig.err + fi parse-build.sh $i/Make.out $configfile parse-torture.sh $i/console.log $configfile parse-console.sh $i/console.log $configfile diff --git a/kernel/tools/testing/selftests/rcutorture/bin/kvm.sh b/kernel/tools/testing/selftests/rcutorture/bin/kvm.sh index dd2812ceb..f6483609e 100755 --- a/kernel/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/kernel/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -55,7 +55,7 @@ usage () { echo " --bootargs kernel-boot-arguments" echo " --bootimage relative-path-to-kernel-boot-image" echo " --buildonly" - echo " --configs \"config-file list\"" + echo " --configs \"config-file list w/ repeat factor (3*TINY01)\"" echo " --cpus N" echo " --datestamp string" echo " --defconfig string" @@ -75,7 +75,7 @@ usage () { while test $# -gt 0 do case "$1" in - --bootargs) + --bootargs|--bootarg) checkarg --bootargs "(list of kernel boot arguments)" "$#" "$2" '.*' '^--' TORTURE_BOOTARGS="$2" shift @@ -88,7 +88,7 @@ do --buildonly) TORTURE_BUILDONLY=1 ;; - --configs) + --configs|--config) checkarg --configs "(list of config files)" "$#" "$2" '^[^/]*$' '^--' configs="$2" shift @@ -134,7 +134,7 @@ do --no-initrd) TORTURE_INITRD=""; export TORTURE_INITRD ;; - --qemu-args) + --qemu-args|--qemu-arg) checkarg --qemu-args "-qemu args" $# "$2" '^-' '^error' TORTURE_QEMU_ARG="$2" shift @@ -178,13 +178,26 @@ fi touch $T/cfgcpu for CF in $configs do - if test -f "$CONFIGFRAG/$CF" + case $CF in + [0-9]\**|[0-9][0-9]\**|[0-9][0-9][0-9]\**) + config_reps=`echo $CF | sed -e 's/\*.*$//'` + CF1=`echo $CF | sed -e 's/^[^*]*\*//'` + ;; + *) + config_reps=1 + CF1=$CF + ;; + esac + if test -f "$CONFIGFRAG/$CF1" then - cpu_count=`configNR_CPUS.sh $CONFIGFRAG/$CF` - cpu_count=`configfrag_boot_cpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF" "$cpu_count"` - echo $CF $cpu_count >> $T/cfgcpu + cpu_count=`configNR_CPUS.sh $CONFIGFRAG/$CF1` + cpu_count=`configfrag_boot_cpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF1" "$cpu_count"` + for ((cur_rep=0;cur_rep<$config_reps;cur_rep++)) + do + echo $CF1 $cpu_count >> $T/cfgcpu + done else - echo "The --configs file $CF does not exist, terminating." + echo "The --configs file $CF1 does not exist, terminating." exit 1 fi done diff --git a/kernel/tools/testing/selftests/rcutorture/configs/lock/CFLIST b/kernel/tools/testing/selftests/rcutorture/configs/lock/CFLIST index 6910b7370..b9611c523 100644 --- a/kernel/tools/testing/selftests/rcutorture/configs/lock/CFLIST +++ b/kernel/tools/testing/selftests/rcutorture/configs/lock/CFLIST @@ -1,4 +1,6 @@ LOCK01 LOCK02 LOCK03 -LOCK04
\ No newline at end of file +LOCK04 +LOCK05 +LOCK06 diff --git a/kernel/tools/testing/selftests/rcutorture/configs/lock/LOCK05 b/kernel/tools/testing/selftests/rcutorture/configs/lock/LOCK05 new file mode 100644 index 000000000..1d1da1477 --- /dev/null +++ b/kernel/tools/testing/selftests/rcutorture/configs/lock/LOCK05 @@ -0,0 +1,6 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=4 +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y diff --git a/kernel/tools/testing/selftests/rcutorture/configs/lock/LOCK05.boot b/kernel/tools/testing/selftests/rcutorture/configs/lock/LOCK05.boot new file mode 100644 index 000000000..8ac37307c --- /dev/null +++ b/kernel/tools/testing/selftests/rcutorture/configs/lock/LOCK05.boot @@ -0,0 +1 @@ +locktorture.torture_type=rtmutex_lock diff --git a/kernel/tools/testing/selftests/rcutorture/configs/lock/LOCK06 b/kernel/tools/testing/selftests/rcutorture/configs/lock/LOCK06 new file mode 100644 index 000000000..1d1da1477 --- /dev/null +++ b/kernel/tools/testing/selftests/rcutorture/configs/lock/LOCK06 @@ -0,0 +1,6 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=4 +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y diff --git a/kernel/tools/testing/selftests/rcutorture/configs/lock/LOCK06.boot b/kernel/tools/testing/selftests/rcutorture/configs/lock/LOCK06.boot new file mode 100644 index 000000000..f92219cd4 --- /dev/null +++ b/kernel/tools/testing/selftests/rcutorture/configs/lock/LOCK06.boot @@ -0,0 +1 @@ +locktorture.torture_type=percpu_rwsem_lock diff --git a/kernel/tools/testing/selftests/rcutorture/configs/rcu/CFcommon b/kernel/tools/testing/selftests/rcutorture/configs/rcu/CFcommon index 49701218d..f824b4c9d 100644 --- a/kernel/tools/testing/selftests/rcutorture/configs/rcu/CFcommon +++ b/kernel/tools/testing/selftests/rcutorture/configs/rcu/CFcommon @@ -1,3 +1,5 @@ CONFIG_RCU_TORTURE_TEST=y CONFIG_PRINTK_TIME=y +CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y +CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y diff --git a/kernel/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N b/kernel/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N index 9fbb41b9b..1a087c3c8 100644 --- a/kernel/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N +++ b/kernel/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N @@ -5,3 +5,4 @@ CONFIG_HOTPLUG_CPU=y CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n +CONFIG_RCU_EXPERT=y diff --git a/kernel/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P b/kernel/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P index 4b6f272db..4837430a7 100644 --- a/kernel/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P +++ b/kernel/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P @@ -5,3 +5,4 @@ CONFIG_HOTPLUG_CPU=y CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=y +#CHECK#CONFIG_RCU_EXPERT=n diff --git a/kernel/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot b/kernel/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot index 238bfe3bd..84a7d51b7 100644 --- a/kernel/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot +++ b/kernel/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot @@ -1 +1 @@ -rcutorture.torture_type=srcu +rcutorture.torture_type=srcud diff --git a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 index 97f0a0b27..bafe94cbd 100644 --- a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 +++ b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 @@ -5,5 +5,6 @@ CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=y CONFIG_DEBUG_LOCK_ALLOC=y -CONFIG_PROVE_RCU=y -CONFIG_TASKS_RCU=y +CONFIG_PROVE_LOCKING=y +#CHECK#CONFIG_PROVE_RCU=y +CONFIG_RCU_EXPERT=y diff --git a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 index 696d2ea74..ad2be91e5 100644 --- a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 +++ b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 @@ -2,4 +2,3 @@ CONFIG_SMP=n CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n -CONFIG_TASKS_RCU=y diff --git a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 index 9c60da5b5..c70c51d5d 100644 --- a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 +++ b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 @@ -6,8 +6,8 @@ CONFIG_HIBERNATION=n CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=y -CONFIG_TASKS_RCU=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=y CONFIG_NO_HZ_FULL_ALL=y +#CHECK#CONFIG_RCU_EXPERT=n diff --git a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TINY02 b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TINY02 index 36e41df3d..f1892e037 100644 --- a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TINY02 +++ b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TINY02 @@ -8,7 +8,7 @@ CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=n CONFIG_RCU_TRACE=y CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y +#CHECK#CONFIG_PROVE_RCU=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_PREEMPT_COUNT=y diff --git a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot index 0f0802730..6c1a292a6 100644 --- a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot +++ b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot @@ -1,2 +1,3 @@ rcupdate.rcu_self_test=1 rcupdate.rcu_self_test_bh=1 +rcutorture.torture_type=rcu_bh diff --git a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE01 index f8a10a750..f572b873c 100644 --- a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE01 +++ b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE01 @@ -13,6 +13,6 @@ CONFIG_MAXSMP=y CONFIG_RCU_NOCB_CPU=y CONFIG_RCU_NOCB_CPU_ZERO=y CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y diff --git a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE02 b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE02 index 629122fb8..ef6a22c44 100644 --- a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE02 +++ b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE02 @@ -14,10 +14,9 @@ CONFIG_SUSPEND=n CONFIG_HIBERNATION=n CONFIG_RCU_FANOUT=3 CONFIG_RCU_FANOUT_LEAF=3 -CONFIG_RCU_FANOUT_EXACT=n CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=n -CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y diff --git a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T index a25de4788..917d2517b 100644 --- a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T +++ b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T @@ -14,10 +14,8 @@ CONFIG_SUSPEND=n CONFIG_HIBERNATION=n CONFIG_RCU_FANOUT=3 CONFIG_RCU_FANOUT_LEAF=3 -CONFIG_RCU_FANOUT_EXACT=n CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=n -CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE03 b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE03 index 53f24e0a0..7a17c503b 100644 --- a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE03 +++ b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE03 @@ -1,5 +1,5 @@ CONFIG_SMP=y -CONFIG_NR_CPUS=8 +CONFIG_NR_CPUS=16 CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=y @@ -9,12 +9,11 @@ CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=n CONFIG_RCU_TRACE=y CONFIG_HOTPLUG_CPU=y -CONFIG_RCU_FANOUT=4 -CONFIG_RCU_FANOUT_LEAF=4 -CONFIG_RCU_FANOUT_EXACT=n +CONFIG_RCU_FANOUT=2 +CONFIG_RCU_FANOUT_LEAF=2 CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_BOOST=y CONFIG_RCU_KTHREAD_PRIO=2 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y diff --git a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot new file mode 100644 index 000000000..120c0c88d --- /dev/null +++ b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot @@ -0,0 +1 @@ +rcutorture.onoff_interval=1 rcutorture.onoff_holdoff=30 diff --git a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE04 index 0f84db35b..39a2c6d7d 100644 --- a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE04 +++ b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE04 @@ -13,10 +13,9 @@ CONFIG_RCU_TRACE=y CONFIG_HOTPLUG_CPU=n CONFIG_SUSPEND=n CONFIG_HIBERNATION=n -CONFIG_RCU_FANOUT=2 -CONFIG_RCU_FANOUT_LEAF=2 -CONFIG_RCU_FANOUT_EXACT=n +CONFIG_RCU_FANOUT=4 +CONFIG_RCU_FANOUT_LEAF=4 CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_RCU_CPU_STALL_INFO=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y diff --git a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE05 index 212e3bfd2..1257d3227 100644 --- a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE05 +++ b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE05 @@ -12,11 +12,10 @@ CONFIG_RCU_TRACE=n CONFIG_HOTPLUG_CPU=y CONFIG_RCU_FANOUT=6 CONFIG_RCU_FANOUT_LEAF=6 -CONFIG_RCU_FANOUT_EXACT=n CONFIG_RCU_NOCB_CPU=y CONFIG_RCU_NOCB_CPU_NONE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_RCU_CPU_STALL_INFO=n +#CHECK#CONFIG_PROVE_RCU=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y diff --git a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE06 b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE06 index 7eee63b44..d3e456b74 100644 --- a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE06 +++ b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE06 @@ -14,10 +14,9 @@ CONFIG_SUSPEND=n CONFIG_HIBERNATION=n CONFIG_RCU_FANOUT=6 CONFIG_RCU_FANOUT_LEAF=6 -CONFIG_RCU_FANOUT_EXACT=y CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_RCU_CPU_STALL_INFO=n +#CHECK#CONFIG_PROVE_RCU=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=y +CONFIG_RCU_EXPERT=y diff --git a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot index da9a03a39..dd90f28ed 100644 --- a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot +++ b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot @@ -1,3 +1,4 @@ rcupdate.rcu_self_test=1 rcupdate.rcu_self_test_bh=1 rcupdate.rcu_self_test_sched=1 +rcutree.rcu_fanout_exact=1 diff --git a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE07 index 92a97fa97..3956b4131 100644 --- a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE07 +++ b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE07 @@ -15,8 +15,7 @@ CONFIG_RCU_TRACE=y CONFIG_HOTPLUG_CPU=y CONFIG_RCU_FANOUT=2 CONFIG_RCU_FANOUT_LEAF=2 -CONFIG_RCU_FANOUT_EXACT=n CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_RCU_CPU_STALL_INFO=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y diff --git a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE08 b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE08 index 5812027d6..bb9b0c1a2 100644 --- a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE08 +++ b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE08 @@ -1,5 +1,5 @@ CONFIG_SMP=y -CONFIG_NR_CPUS=16 +CONFIG_NR_CPUS=8 CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=y @@ -13,13 +13,12 @@ CONFIG_HOTPLUG_CPU=n CONFIG_SUSPEND=n CONFIG_HIBERNATION=n CONFIG_RCU_FANOUT=3 -CONFIG_RCU_FANOUT_EXACT=y CONFIG_RCU_FANOUT_LEAF=2 CONFIG_RCU_NOCB_CPU=y CONFIG_RCU_NOCB_CPU_ALL=y CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_RCU_CPU_STALL_INFO=n +#CHECK#CONFIG_PROVE_RCU=y CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y diff --git a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T index 3eaeccacb..2ad13f0d2 100644 --- a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T +++ b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T @@ -13,11 +13,9 @@ CONFIG_HOTPLUG_CPU=n CONFIG_SUSPEND=n CONFIG_HIBERNATION=n CONFIG_RCU_FANOUT=3 -CONFIG_RCU_FANOUT_EXACT=y CONFIG_RCU_FANOUT_LEAF=2 CONFIG_RCU_NOCB_CPU=y CONFIG_RCU_NOCB_CPU_ALL=y CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T.boot b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T.boot new file mode 100644 index 000000000..883149b5f --- /dev/null +++ b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T.boot @@ -0,0 +1 @@ +rcutree.rcu_fanout_exact=1 diff --git a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot index 2561daf60..fb066dc82 100644 --- a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot +++ b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot @@ -1,3 +1,4 @@ rcutorture.torture_type=sched rcupdate.rcu_self_test=1 rcupdate.rcu_self_test_sched=1 +rcutree.rcu_fanout_exact=1 diff --git a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE09 b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE09 index 6076b36f6..6710e749d 100644 --- a/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE09 +++ b/kernel/tools/testing/selftests/rcutorture/configs/rcu/TREE09 @@ -13,6 +13,6 @@ CONFIG_SUSPEND=n CONFIG_HIBERNATION=n CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +#CHECK#CONFIG_RCU_EXPERT=n diff --git a/kernel/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/kernel/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt index ec03c883d..657f3a035 100644 --- a/kernel/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt +++ b/kernel/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt @@ -12,13 +12,11 @@ CONFIG_NO_HZ_IDLE -- Do those not otherwise specified. (Groups of two.) CONFIG_NO_HZ_FULL -- Do two, one with CONFIG_NO_HZ_FULL_SYSIDLE. CONFIG_NO_HZ_FULL_SYSIDLE -- Do one. CONFIG_PREEMPT -- Do half. (First three and #8.) -CONFIG_PROVE_LOCKING -- Do all but two, covering CONFIG_PROVE_RCU and not. -CONFIG_PROVE_RCU -- Do all but one under CONFIG_PROVE_LOCKING. +CONFIG_PROVE_LOCKING -- Do several, covering CONFIG_DEBUG_LOCK_ALLOC=y and not. +CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING. CONFIG_RCU_BOOST -- one of PREEMPT_RCU. CONFIG_RCU_KTHREAD_PRIO -- set to 2 for _BOOST testing. -CONFIG_RCU_CPU_STALL_INFO -- Do one. -CONFIG_RCU_FANOUT -- Cover hierarchy as currently, but overlap with others. -CONFIG_RCU_FANOUT_EXACT -- Do one. +CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others. CONFIG_RCU_FANOUT_LEAF -- Do one non-default. CONFIG_RCU_FAST_NO_HZ -- Do one, but not with CONFIG_RCU_NOCB_CPU_ALL. CONFIG_RCU_NOCB_CPU -- Do three, see below. @@ -27,28 +25,19 @@ CONFIG_RCU_NOCB_CPU_NONE -- Do one. CONFIG_RCU_NOCB_CPU_ZERO -- Do one. CONFIG_RCU_TRACE -- Do half. CONFIG_SMP -- Need one !SMP for PREEMPT_RCU. +!RCU_EXPERT -- Do a few, but these have to be vanilla configurations. RCU-bh: Do one with PREEMPT and one with !PREEMPT. RCU-sched: Do one with PREEMPT but not BOOST. -Hierarchy: - -TREE01. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=8, CONFIG_RCU_FANOUT_EXACT=n. -TREE02. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=3, CONFIG_RCU_FANOUT_EXACT=n, - CONFIG_RCU_FANOUT_LEAF=3. -TREE03. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=4, CONFIG_RCU_FANOUT_EXACT=n, - CONFIG_RCU_FANOUT_LEAF=4. -TREE04. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=2, CONFIG_RCU_FANOUT_EXACT=n, - CONFIG_RCU_FANOUT_LEAF=2. -TREE05. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=6, CONFIG_RCU_FANOUT_EXACT=n - CONFIG_RCU_FANOUT_LEAF=6. -TREE06. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=6, CONFIG_RCU_FANOUT_EXACT=y - CONFIG_RCU_FANOUT_LEAF=6. -TREE07. CONFIG_NR_CPUS=16, CONFIG_RCU_FANOUT=2, CONFIG_RCU_FANOUT_EXACT=n, - CONFIG_RCU_FANOUT_LEAF=2. -TREE08. CONFIG_NR_CPUS=16, CONFIG_RCU_FANOUT=3, CONFIG_RCU_FANOUT_EXACT=y, - CONFIG_RCU_FANOUT_LEAF=2. -TREE09. CONFIG_NR_CPUS=1. +Boot parameters: + +nohz_full - do at least one. +maxcpu -- do at least one. +rcupdate.rcu_self_test_bh -- Do at least one each, offloaded and not. +rcupdate.rcu_self_test_sched -- Do at least one each, offloaded and not. +rcupdate.rcu_self_test -- Do at least one each, offloaded and not. +rcutree.rcu_fanout_exact -- Do at least one. Kconfig Parameters Ignored: diff --git a/kernel/tools/testing/selftests/seccomp/.gitignore b/kernel/tools/testing/selftests/seccomp/.gitignore new file mode 100644 index 000000000..346d83ca8 --- /dev/null +++ b/kernel/tools/testing/selftests/seccomp/.gitignore @@ -0,0 +1 @@ +seccomp_bpf diff --git a/kernel/tools/testing/selftests/seccomp/Makefile b/kernel/tools/testing/selftests/seccomp/Makefile new file mode 100644 index 000000000..8401e87e3 --- /dev/null +++ b/kernel/tools/testing/selftests/seccomp/Makefile @@ -0,0 +1,10 @@ +TEST_PROGS := seccomp_bpf +CFLAGS += -Wl,-no-as-needed -Wall +LDFLAGS += -lpthread + +all: $(TEST_PROGS) + +include ../lib.mk + +clean: + $(RM) $(TEST_PROGS) diff --git a/kernel/tools/testing/selftests/seccomp/seccomp_bpf.c b/kernel/tools/testing/selftests/seccomp/seccomp_bpf.c new file mode 100644 index 000000000..882fe83a3 --- /dev/null +++ b/kernel/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -0,0 +1,2211 @@ +/* + * Copyright (c) 2012 The Chromium OS Authors. All rights reserved. + * Use of this source code is governed by the GPLv2 license. + * + * Test code for seccomp bpf. + */ + +#include <asm/siginfo.h> +#define __have_siginfo_t 1 +#define __have_sigval_t 1 +#define __have_sigevent_t 1 + +#include <errno.h> +#include <linux/filter.h> +#include <sys/prctl.h> +#include <sys/ptrace.h> +#include <sys/types.h> +#include <sys/user.h> +#include <linux/prctl.h> +#include <linux/ptrace.h> +#include <linux/seccomp.h> +#include <pthread.h> +#include <semaphore.h> +#include <signal.h> +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <time.h> +#include <linux/elf.h> +#include <sys/uio.h> +#include <sys/utsname.h> +#include <sys/fcntl.h> +#include <sys/mman.h> +#include <sys/times.h> + +#define _GNU_SOURCE +#include <unistd.h> +#include <sys/syscall.h> + +#include "test_harness.h" + +#ifndef PR_SET_PTRACER +# define PR_SET_PTRACER 0x59616d61 +#endif + +#ifndef PR_SET_NO_NEW_PRIVS +#define PR_SET_NO_NEW_PRIVS 38 +#define PR_GET_NO_NEW_PRIVS 39 +#endif + +#ifndef PR_SECCOMP_EXT +#define PR_SECCOMP_EXT 43 +#endif + +#ifndef SECCOMP_EXT_ACT +#define SECCOMP_EXT_ACT 1 +#endif + +#ifndef SECCOMP_EXT_ACT_TSYNC +#define SECCOMP_EXT_ACT_TSYNC 1 +#endif + +#ifndef SECCOMP_MODE_STRICT +#define SECCOMP_MODE_STRICT 1 +#endif + +#ifndef SECCOMP_MODE_FILTER +#define SECCOMP_MODE_FILTER 2 +#endif + +#ifndef SECCOMP_RET_KILL +#define SECCOMP_RET_KILL 0x00000000U /* kill the task immediately */ +#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ +#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ +#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ +#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ + +/* Masks for the return value sections. */ +#define SECCOMP_RET_ACTION 0x7fff0000U +#define SECCOMP_RET_DATA 0x0000ffffU + +struct seccomp_data { + int nr; + __u32 arch; + __u64 instruction_pointer; + __u64 args[6]; +}; +#endif + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n])) +#elif __BYTE_ORDER == __BIG_ENDIAN +#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32)) +#else +#error "wut? Unknown __BYTE_ORDER?!" +#endif + +#define SIBLING_EXIT_UNKILLED 0xbadbeef +#define SIBLING_EXIT_FAILURE 0xbadface +#define SIBLING_EXIT_NEWPRIVS 0xbadfeed + +TEST(mode_strict_support) +{ + long ret; + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support CONFIG_SECCOMP"); + } + syscall(__NR_exit, 1); +} + +TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL) +{ + long ret; + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support CONFIG_SECCOMP"); + } + syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER, + NULL, NULL, NULL); + EXPECT_FALSE(true) { + TH_LOG("Unreachable!"); + } +} + +/* Note! This doesn't test no new privs behavior */ +TEST(no_new_privs_support) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + EXPECT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } +} + +/* Tests kernel support by checking for a copy_from_user() fault on * NULL. */ +TEST(mode_filter_support) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EFAULT, errno) { + TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!"); + } +} + +TEST(mode_filter_without_nnp) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0); + ASSERT_LE(0, ret) { + TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS"); + } + errno = 0; + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + /* Succeeds with CAP_SYS_ADMIN, fails without */ + /* TODO(wad) check caps not euid */ + if (geteuid()) { + EXPECT_EQ(-1, ret); + EXPECT_EQ(EACCES, errno); + } else { + EXPECT_EQ(0, ret); + } +} + +#define MAX_INSNS_PER_PATH 32768 + +TEST(filter_size_limits) +{ + int i; + int count = BPF_MAXINSNS + 1; + struct sock_filter allow[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_filter *filter; + struct sock_fprog prog = { }; + long ret; + + filter = calloc(count, sizeof(*filter)); + ASSERT_NE(NULL, filter); + + for (i = 0; i < count; i++) + filter[i] = allow[0]; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + prog.filter = filter; + prog.len = count; + + /* Too many filter instructions in a single filter. */ + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_NE(0, ret) { + TH_LOG("Installing %d insn filter was allowed", prog.len); + } + + /* One less is okay, though. */ + prog.len -= 1; + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Installing %d insn filter wasn't allowed", prog.len); + } +} + +TEST(filter_chain_limits) +{ + int i; + int count = BPF_MAXINSNS; + struct sock_filter allow[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_filter *filter; + struct sock_fprog prog = { }; + long ret; + + filter = calloc(count, sizeof(*filter)); + ASSERT_NE(NULL, filter); + + for (i = 0; i < count; i++) + filter[i] = allow[0]; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + prog.filter = filter; + prog.len = 1; + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_EQ(0, ret); + + prog.len = count; + + /* Too many total filter instructions. */ + for (i = 0; i < MAX_INSNS_PER_PATH; i++) { + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + if (ret != 0) + break; + } + ASSERT_NE(0, ret) { + TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)", + i, count, i * (count + 4)); + } +} + +TEST(mode_filter_cannot_move_to_strict) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); +} + + +TEST(mode_filter_get_seccomp) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); + EXPECT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); + EXPECT_EQ(2, ret); +} + + +TEST(ALLOW_all) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); +} + +TEST(empty_prog) +{ + struct sock_filter filter[] = { + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); +} + +TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, 0x10000000U), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + EXPECT_EQ(0, syscall(__NR_getpid)) { + TH_LOG("getpid() shouldn't ever return"); + } +} + +/* return code >= 0x80000000 is unused. */ +TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, 0x90000000U), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + EXPECT_EQ(0, syscall(__NR_getpid)) { + TH_LOG("getpid() shouldn't ever return"); + } +} + +TEST_SIGNAL(KILL_all, SIGSYS) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); +} + +TEST_SIGNAL(KILL_one, SIGSYS) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* getpid() should never return. */ + EXPECT_EQ(0, syscall(__NR_getpid)); +} + +TEST_SIGNAL(KILL_one_arg_one, SIGSYS) +{ + void *fatal_address; + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + /* Only both with lower 32-bit for now. */ + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, + (unsigned long)&fatal_address, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + struct tms timebuf; + clock_t clock = times(&timebuf); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + EXPECT_LE(clock, syscall(__NR_times, &timebuf)); + /* times() should never return. */ + EXPECT_EQ(0, syscall(__NR_times, &fatal_address)); +} + +TEST_SIGNAL(KILL_one_arg_six, SIGSYS) +{ +#ifndef __NR_mmap2 + int sysno = __NR_mmap; +#else + int sysno = __NR_mmap2; +#endif + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + /* Only both with lower 32-bit for now. */ + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + int fd; + void *map1, *map2; + int page_size = sysconf(_SC_PAGESIZE); + + ASSERT_LT(0, page_size); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + fd = open("/dev/zero", O_RDONLY); + ASSERT_NE(-1, fd); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + map1 = (void *)syscall(sysno, + NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size); + EXPECT_NE(MAP_FAILED, map1); + /* mmap2() should never return. */ + map2 = (void *)syscall(sysno, + NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE); + EXPECT_EQ(MAP_FAILED, map2); + + /* The test failed, so clean up the resources. */ + munmap(map1, page_size); + munmap(map2, page_size); + close(fd); +} + +/* TODO(wad) add 64-bit versus 32-bit arg tests. */ +TEST(arg_out_of_range) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); +} + +TEST(ERRNO_valid) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | E2BIG), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + EXPECT_EQ(-1, read(0, NULL, 0)); + EXPECT_EQ(E2BIG, errno); +} + +TEST(ERRNO_zero) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* "errno" of 0 is ok. */ + EXPECT_EQ(0, read(0, NULL, 0)); +} + +TEST(ERRNO_capped) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 4096), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + EXPECT_EQ(-1, read(0, NULL, 0)); + EXPECT_EQ(4095, errno); +} + +FIXTURE_DATA(TRAP) { + struct sock_fprog prog; +}; + +FIXTURE_SETUP(TRAP) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + + memset(&self->prog, 0, sizeof(self->prog)); + self->prog.filter = malloc(sizeof(filter)); + ASSERT_NE(NULL, self->prog.filter); + memcpy(self->prog.filter, filter, sizeof(filter)); + self->prog.len = (unsigned short)ARRAY_SIZE(filter); +} + +FIXTURE_TEARDOWN(TRAP) +{ + if (self->prog.filter) + free(self->prog.filter); +} + +TEST_F_SIGNAL(TRAP, dfl, SIGSYS) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); + ASSERT_EQ(0, ret); + syscall(__NR_getpid); +} + +/* Ensure that SIGSYS overrides SIG_IGN */ +TEST_F_SIGNAL(TRAP, ign, SIGSYS) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + signal(SIGSYS, SIG_IGN); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); + ASSERT_EQ(0, ret); + syscall(__NR_getpid); +} + +static struct siginfo TRAP_info; +static volatile int TRAP_nr; +static void TRAP_action(int nr, siginfo_t *info, void *void_context) +{ + memcpy(&TRAP_info, info, sizeof(TRAP_info)); + TRAP_nr = nr; +} + +TEST_F(TRAP, handler) +{ + int ret, test; + struct sigaction act; + sigset_t mask; + + memset(&act, 0, sizeof(act)); + sigemptyset(&mask); + sigaddset(&mask, SIGSYS); + + act.sa_sigaction = &TRAP_action; + act.sa_flags = SA_SIGINFO; + ret = sigaction(SIGSYS, &act, NULL); + ASSERT_EQ(0, ret) { + TH_LOG("sigaction failed"); + } + ret = sigprocmask(SIG_UNBLOCK, &mask, NULL); + ASSERT_EQ(0, ret) { + TH_LOG("sigprocmask failed"); + } + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); + ASSERT_EQ(0, ret); + TRAP_nr = 0; + memset(&TRAP_info, 0, sizeof(TRAP_info)); + /* Expect the registers to be rolled back. (nr = error) may vary + * based on arch. */ + ret = syscall(__NR_getpid); + /* Silence gcc warning about volatile. */ + test = TRAP_nr; + EXPECT_EQ(SIGSYS, test); + struct local_sigsys { + void *_call_addr; /* calling user insn */ + int _syscall; /* triggering system call number */ + unsigned int _arch; /* AUDIT_ARCH_* of syscall */ + } *sigsys = (struct local_sigsys *) +#ifdef si_syscall + &(TRAP_info.si_call_addr); +#else + &TRAP_info.si_pid; +#endif + EXPECT_EQ(__NR_getpid, sigsys->_syscall); + /* Make sure arch is non-zero. */ + EXPECT_NE(0, sigsys->_arch); + EXPECT_NE(0, (unsigned long)sigsys->_call_addr); +} + +FIXTURE_DATA(precedence) { + struct sock_fprog allow; + struct sock_fprog trace; + struct sock_fprog error; + struct sock_fprog trap; + struct sock_fprog kill; +}; + +FIXTURE_SETUP(precedence) +{ + struct sock_filter allow_insns[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_filter trace_insns[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE), + }; + struct sock_filter error_insns[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO), + }; + struct sock_filter trap_insns[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), + }; + struct sock_filter kill_insns[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + }; + + memset(self, 0, sizeof(*self)); +#define FILTER_ALLOC(_x) \ + self->_x.filter = malloc(sizeof(_x##_insns)); \ + ASSERT_NE(NULL, self->_x.filter); \ + memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \ + self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns) + FILTER_ALLOC(allow); + FILTER_ALLOC(trace); + FILTER_ALLOC(error); + FILTER_ALLOC(trap); + FILTER_ALLOC(kill); +} + +FIXTURE_TEARDOWN(precedence) +{ +#define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter) + FILTER_FREE(allow); + FILTER_FREE(trace); + FILTER_FREE(error); + FILTER_FREE(trap); + FILTER_FREE(kill); +} + +TEST_F(precedence, allow_ok) +{ + pid_t parent, res = 0; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + res = syscall(__NR_getppid); + EXPECT_EQ(parent, res); +} + +TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS) +{ + pid_t parent, res = 0; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + res = syscall(__NR_getppid); + EXPECT_EQ(parent, res); + /* getpid() should never return. */ + res = syscall(__NR_getpid); + EXPECT_EQ(0, res); +} + +TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* getpid() should never return. */ + EXPECT_EQ(0, syscall(__NR_getpid)); +} + +TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* getpid() should never return. */ + EXPECT_EQ(0, syscall(__NR_getpid)); +} + +TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* getpid() should never return. */ + EXPECT_EQ(0, syscall(__NR_getpid)); +} + +TEST_F(precedence, errno_is_third) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + EXPECT_EQ(0, syscall(__NR_getpid)); +} + +TEST_F(precedence, errno_is_third_in_any_order) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + EXPECT_EQ(0, syscall(__NR_getpid)); +} + +TEST_F(precedence, trace_is_fourth) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* No ptracer */ + EXPECT_EQ(-1, syscall(__NR_getpid)); +} + +TEST_F(precedence, trace_is_fourth_in_any_order) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* No ptracer */ + EXPECT_EQ(-1, syscall(__NR_getpid)); +} + +#ifndef PTRACE_O_TRACESECCOMP +#define PTRACE_O_TRACESECCOMP 0x00000080 +#endif + +/* Catch the Ubuntu 12.04 value error. */ +#if PTRACE_EVENT_SECCOMP != 7 +#undef PTRACE_EVENT_SECCOMP +#endif + +#ifndef PTRACE_EVENT_SECCOMP +#define PTRACE_EVENT_SECCOMP 7 +#endif + +#define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP) +bool tracer_running; +void tracer_stop(int sig) +{ + tracer_running = false; +} + +typedef void tracer_func_t(struct __test_metadata *_metadata, + pid_t tracee, int status, void *args); + +void tracer(struct __test_metadata *_metadata, int fd, pid_t tracee, + tracer_func_t tracer_func, void *args) +{ + int ret = -1; + struct sigaction action = { + .sa_handler = tracer_stop, + }; + + /* Allow external shutdown. */ + tracer_running = true; + ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL)); + + errno = 0; + while (ret == -1 && errno != EINVAL) + ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0); + ASSERT_EQ(0, ret) { + kill(tracee, SIGKILL); + } + /* Wait for attach stop */ + wait(NULL); + + ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, PTRACE_O_TRACESECCOMP); + ASSERT_EQ(0, ret) { + TH_LOG("Failed to set PTRACE_O_TRACESECCOMP"); + kill(tracee, SIGKILL); + } + ptrace(PTRACE_CONT, tracee, NULL, 0); + + /* Unblock the tracee */ + ASSERT_EQ(1, write(fd, "A", 1)); + ASSERT_EQ(0, close(fd)); + + /* Run until we're shut down. Must assert to stop execution. */ + while (tracer_running) { + int status; + + if (wait(&status) != tracee) + continue; + if (WIFSIGNALED(status) || WIFEXITED(status)) + /* Child is dead. Time to go. */ + return; + + /* Make sure this is a seccomp event. */ + ASSERT_EQ(true, IS_SECCOMP_EVENT(status)); + + tracer_func(_metadata, tracee, status, args); + + ret = ptrace(PTRACE_CONT, tracee, NULL, NULL); + ASSERT_EQ(0, ret); + } + /* Directly report the status of our test harness results. */ + syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); +} + +/* Common tracer setup/teardown functions. */ +void cont_handler(int num) +{ } +pid_t setup_trace_fixture(struct __test_metadata *_metadata, + tracer_func_t func, void *args) +{ + char sync; + int pipefd[2]; + pid_t tracer_pid; + pid_t tracee = getpid(); + + /* Setup a pipe for clean synchronization. */ + ASSERT_EQ(0, pipe(pipefd)); + + /* Fork a child which we'll promote to tracer */ + tracer_pid = fork(); + ASSERT_LE(0, tracer_pid); + signal(SIGALRM, cont_handler); + if (tracer_pid == 0) { + close(pipefd[0]); + tracer(_metadata, pipefd[1], tracee, func, args); + syscall(__NR_exit, 0); + } + close(pipefd[1]); + prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0); + read(pipefd[0], &sync, 1); + close(pipefd[0]); + + return tracer_pid; +} +void teardown_trace_fixture(struct __test_metadata *_metadata, + pid_t tracer) +{ + if (tracer) { + int status; + /* + * Extract the exit code from the other process and + * adopt it for ourselves in case its asserts failed. + */ + ASSERT_EQ(0, kill(tracer, SIGUSR1)); + ASSERT_EQ(tracer, waitpid(tracer, &status, 0)); + if (WEXITSTATUS(status)) + _metadata->passed = 0; + } +} + +/* "poke" tracer arguments and function. */ +struct tracer_args_poke_t { + unsigned long poke_addr; +}; + +void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status, + void *args) +{ + int ret; + unsigned long msg; + struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args; + + ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); + EXPECT_EQ(0, ret); + /* If this fails, don't try to recover. */ + ASSERT_EQ(0x1001, msg) { + kill(tracee, SIGKILL); + } + /* + * Poke in the message. + * Registers are not touched to try to keep this relatively arch + * agnostic. + */ + ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001); + EXPECT_EQ(0, ret); +} + +FIXTURE_DATA(TRACE_poke) { + struct sock_fprog prog; + pid_t tracer; + long poked; + struct tracer_args_poke_t tracer_args; +}; + +FIXTURE_SETUP(TRACE_poke) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + + self->poked = 0; + memset(&self->prog, 0, sizeof(self->prog)); + self->prog.filter = malloc(sizeof(filter)); + ASSERT_NE(NULL, self->prog.filter); + memcpy(self->prog.filter, filter, sizeof(filter)); + self->prog.len = (unsigned short)ARRAY_SIZE(filter); + + /* Set up tracer args. */ + self->tracer_args.poke_addr = (unsigned long)&self->poked; + + /* Launch tracer. */ + self->tracer = setup_trace_fixture(_metadata, tracer_poke, + &self->tracer_args); +} + +FIXTURE_TEARDOWN(TRACE_poke) +{ + teardown_trace_fixture(_metadata, self->tracer); + if (self->prog.filter) + free(self->prog.filter); +} + +TEST_F(TRACE_poke, read_has_side_effects) +{ + ssize_t ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); + ASSERT_EQ(0, ret); + + EXPECT_EQ(0, self->poked); + ret = read(-1, NULL, 0); + EXPECT_EQ(-1, ret); + EXPECT_EQ(0x1001, self->poked); +} + +TEST_F(TRACE_poke, getpid_runs_normally) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); + ASSERT_EQ(0, ret); + + EXPECT_EQ(0, self->poked); + EXPECT_NE(0, syscall(__NR_getpid)); + EXPECT_EQ(0, self->poked); +} + +#if defined(__x86_64__) +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM orig_rax +# define SYSCALL_RET rax +#elif defined(__i386__) +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM orig_eax +# define SYSCALL_RET eax +#elif defined(__arm__) +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM ARM_r7 +# define SYSCALL_RET ARM_r0 +#elif defined(__aarch64__) +# define ARCH_REGS struct user_pt_regs +# define SYSCALL_NUM regs[8] +# define SYSCALL_RET regs[0] +#elif defined(__powerpc__) +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM gpr[0] +# define SYSCALL_RET gpr[3] +#elif defined(__s390__) +# define ARCH_REGS s390_regs +# define SYSCALL_NUM gprs[2] +# define SYSCALL_RET gprs[2] +#else +# error "Do not know how to find your architecture's registers and syscalls" +#endif + +/* Architecture-specific syscall fetching routine. */ +int get_syscall(struct __test_metadata *_metadata, pid_t tracee) +{ + struct iovec iov; + ARCH_REGS regs; + + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) { + TH_LOG("PTRACE_GETREGSET failed"); + return -1; + } + + return regs.SYSCALL_NUM; +} + +/* Architecture-specific syscall changing routine. */ +void change_syscall(struct __test_metadata *_metadata, + pid_t tracee, int syscall) +{ + struct iovec iov; + int ret; + ARCH_REGS regs; + + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov); + EXPECT_EQ(0, ret); + +#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \ + defined(__s390__) + { + regs.SYSCALL_NUM = syscall; + } + +#elif defined(__arm__) +# ifndef PTRACE_SET_SYSCALL +# define PTRACE_SET_SYSCALL 23 +# endif + { + ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall); + EXPECT_EQ(0, ret); + } + +#elif defined(__aarch64__) +# ifndef NT_ARM_SYSTEM_CALL +# define NT_ARM_SYSTEM_CALL 0x404 +# endif + { + iov.iov_base = &syscall; + iov.iov_len = sizeof(syscall); + ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL, + &iov); + EXPECT_EQ(0, ret); + } + +#else + ASSERT_EQ(1, 0) { + TH_LOG("How is the syscall changed on this architecture?"); + } +#endif + + /* If syscall is skipped, change return value. */ + if (syscall == -1) + regs.SYSCALL_RET = 1; + + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov); + EXPECT_EQ(0, ret); +} + +void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee, + int status, void *args) +{ + int ret; + unsigned long msg; + + /* Make sure we got the right message. */ + ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); + EXPECT_EQ(0, ret); + + /* Validate and take action on expected syscalls. */ + switch (msg) { + case 0x1002: + /* change getpid to getppid. */ + EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee)); + change_syscall(_metadata, tracee, __NR_getppid); + break; + case 0x1003: + /* skip gettid. */ + EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee)); + change_syscall(_metadata, tracee, -1); + break; + case 0x1004: + /* do nothing (allow getppid) */ + EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee)); + break; + default: + EXPECT_EQ(0, msg) { + TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg); + kill(tracee, SIGKILL); + } + } + +} + +FIXTURE_DATA(TRACE_syscall) { + struct sock_fprog prog; + pid_t tracer, mytid, mypid, parent; +}; + +FIXTURE_SETUP(TRACE_syscall) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + + memset(&self->prog, 0, sizeof(self->prog)); + self->prog.filter = malloc(sizeof(filter)); + ASSERT_NE(NULL, self->prog.filter); + memcpy(self->prog.filter, filter, sizeof(filter)); + self->prog.len = (unsigned short)ARRAY_SIZE(filter); + + /* Prepare some testable syscall results. */ + self->mytid = syscall(__NR_gettid); + ASSERT_GT(self->mytid, 0); + ASSERT_NE(self->mytid, 1) { + TH_LOG("Running this test as init is not supported. :)"); + } + + self->mypid = getpid(); + ASSERT_GT(self->mypid, 0); + ASSERT_EQ(self->mytid, self->mypid); + + self->parent = getppid(); + ASSERT_GT(self->parent, 0); + ASSERT_NE(self->parent, self->mypid); + + /* Launch tracer. */ + self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL); +} + +FIXTURE_TEARDOWN(TRACE_syscall) +{ + teardown_trace_fixture(_metadata, self->tracer); + if (self->prog.filter) + free(self->prog.filter); +} + +TEST_F(TRACE_syscall, syscall_allowed) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); + ASSERT_EQ(0, ret); + + /* getppid works as expected (no changes). */ + EXPECT_EQ(self->parent, syscall(__NR_getppid)); + EXPECT_NE(self->mypid, syscall(__NR_getppid)); +} + +TEST_F(TRACE_syscall, syscall_redirected) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); + ASSERT_EQ(0, ret); + + /* getpid has been redirected to getppid as expected. */ + EXPECT_EQ(self->parent, syscall(__NR_getpid)); + EXPECT_NE(self->mypid, syscall(__NR_getpid)); +} + +TEST_F(TRACE_syscall, syscall_dropped) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); + ASSERT_EQ(0, ret); + + /* gettid has been skipped and an altered return value stored. */ + EXPECT_EQ(1, syscall(__NR_gettid)); + EXPECT_NE(self->mytid, syscall(__NR_gettid)); +} + +#ifndef __NR_seccomp +# if defined(__i386__) +# define __NR_seccomp 354 +# elif defined(__x86_64__) +# define __NR_seccomp 317 +# elif defined(__arm__) +# define __NR_seccomp 383 +# elif defined(__aarch64__) +# define __NR_seccomp 277 +# elif defined(__powerpc__) +# define __NR_seccomp 358 +# elif defined(__s390__) +# define __NR_seccomp 348 +# else +# warning "seccomp syscall number unknown for this architecture" +# define __NR_seccomp 0xffff +# endif +#endif + +#ifndef SECCOMP_SET_MODE_STRICT +#define SECCOMP_SET_MODE_STRICT 0 +#endif + +#ifndef SECCOMP_SET_MODE_FILTER +#define SECCOMP_SET_MODE_FILTER 1 +#endif + +#ifndef SECCOMP_FLAG_FILTER_TSYNC +#define SECCOMP_FLAG_FILTER_TSYNC 1 +#endif + +#ifndef seccomp +int seccomp(unsigned int op, unsigned int flags, struct sock_fprog *filter) +{ + errno = 0; + return syscall(__NR_seccomp, op, flags, filter); +} +#endif + +TEST(seccomp_syscall) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + /* Reject insane operation. */ + ret = seccomp(-1, 0, &prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Did not reject crazy op value!"); + } + + /* Reject strict with flags or pointer. */ + ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Did not reject mode strict with flags!"); + } + ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Did not reject mode strict with uargs!"); + } + + /* Reject insane args for filter. */ + ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Did not reject crazy filter flags!"); + } + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL); + EXPECT_EQ(EFAULT, errno) { + TH_LOG("Did not reject NULL filter!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); + EXPECT_EQ(0, errno) { + TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s", + strerror(errno)); + } +} + +TEST(seccomp_syscall_mode_lock) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + EXPECT_EQ(0, ret) { + TH_LOG("Could not install filter!"); + } + + /* Make sure neither entry point will switch to strict. */ + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Switched to mode strict!"); + } + + ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Switched to mode strict!"); + } +} + +TEST(TSYNC_first) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + EXPECT_EQ(0, ret) { + TH_LOG("Could not install initial filter with TSYNC!"); + } +} + +#define TSYNC_SIBLINGS 2 +struct tsync_sibling { + pthread_t tid; + pid_t system_tid; + sem_t *started; + pthread_cond_t *cond; + pthread_mutex_t *mutex; + int diverge; + int num_waits; + struct sock_fprog *prog; + struct __test_metadata *metadata; +}; + +FIXTURE_DATA(TSYNC) { + struct sock_fprog root_prog, apply_prog; + struct tsync_sibling sibling[TSYNC_SIBLINGS]; + sem_t started; + pthread_cond_t cond; + pthread_mutex_t mutex; + int sibling_count; +}; + +FIXTURE_SETUP(TSYNC) +{ + struct sock_filter root_filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_filter apply_filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + + memset(&self->root_prog, 0, sizeof(self->root_prog)); + memset(&self->apply_prog, 0, sizeof(self->apply_prog)); + memset(&self->sibling, 0, sizeof(self->sibling)); + self->root_prog.filter = malloc(sizeof(root_filter)); + ASSERT_NE(NULL, self->root_prog.filter); + memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter)); + self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter); + + self->apply_prog.filter = malloc(sizeof(apply_filter)); + ASSERT_NE(NULL, self->apply_prog.filter); + memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter)); + self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter); + + self->sibling_count = 0; + pthread_mutex_init(&self->mutex, NULL); + pthread_cond_init(&self->cond, NULL); + sem_init(&self->started, 0, 0); + self->sibling[0].tid = 0; + self->sibling[0].cond = &self->cond; + self->sibling[0].started = &self->started; + self->sibling[0].mutex = &self->mutex; + self->sibling[0].diverge = 0; + self->sibling[0].num_waits = 1; + self->sibling[0].prog = &self->root_prog; + self->sibling[0].metadata = _metadata; + self->sibling[1].tid = 0; + self->sibling[1].cond = &self->cond; + self->sibling[1].started = &self->started; + self->sibling[1].mutex = &self->mutex; + self->sibling[1].diverge = 0; + self->sibling[1].prog = &self->root_prog; + self->sibling[1].num_waits = 1; + self->sibling[1].metadata = _metadata; +} + +FIXTURE_TEARDOWN(TSYNC) +{ + int sib = 0; + + if (self->root_prog.filter) + free(self->root_prog.filter); + if (self->apply_prog.filter) + free(self->apply_prog.filter); + + for ( ; sib < self->sibling_count; ++sib) { + struct tsync_sibling *s = &self->sibling[sib]; + void *status; + + if (!s->tid) + continue; + if (pthread_kill(s->tid, 0)) { + pthread_cancel(s->tid); + pthread_join(s->tid, &status); + } + } + pthread_mutex_destroy(&self->mutex); + pthread_cond_destroy(&self->cond); + sem_destroy(&self->started); +} + +void *tsync_sibling(void *data) +{ + long ret = 0; + struct tsync_sibling *me = data; + + me->system_tid = syscall(__NR_gettid); + + pthread_mutex_lock(me->mutex); + if (me->diverge) { + /* Just re-apply the root prog to fork the tree */ + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, + me->prog, 0, 0); + } + sem_post(me->started); + /* Return outside of started so parent notices failures. */ + if (ret) { + pthread_mutex_unlock(me->mutex); + return (void *)SIBLING_EXIT_FAILURE; + } + do { + pthread_cond_wait(me->cond, me->mutex); + me->num_waits = me->num_waits - 1; + } while (me->num_waits); + pthread_mutex_unlock(me->mutex); + + ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); + if (!ret) + return (void *)SIBLING_EXIT_NEWPRIVS; + read(0, NULL, 0); + return (void *)SIBLING_EXIT_UNKILLED; +} + +void tsync_start_sibling(struct tsync_sibling *sibling) +{ + pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling); +} + +TEST_F(TSYNC, siblings_fail_prctl) +{ + long ret; + void *status; + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + /* Check prctl failure detection by requesting sib 0 diverge. */ + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + ASSERT_EQ(0, ret) { + TH_LOG("setting filter failed"); + } + + self->sibling[0].diverge = 1; + tsync_start_sibling(&self->sibling[0]); + tsync_start_sibling(&self->sibling[1]); + + while (self->sibling_count < TSYNC_SIBLINGS) { + sem_wait(&self->started); + self->sibling_count++; + } + + /* Signal the threads to clean up*/ + pthread_mutex_lock(&self->mutex); + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + + /* Ensure diverging sibling failed to call prctl. */ + pthread_join(self->sibling[0].tid, &status); + EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status); + pthread_join(self->sibling[1].tid, &status); + EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); +} + +TEST_F(TSYNC, two_siblings_with_ancestor) +{ + long ret; + void *status; + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); + } + tsync_start_sibling(&self->sibling[0]); + tsync_start_sibling(&self->sibling[1]); + + while (self->sibling_count < TSYNC_SIBLINGS) { + sem_wait(&self->started); + self->sibling_count++; + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &self->apply_prog); + ASSERT_EQ(0, ret) { + TH_LOG("Could install filter on all threads!"); + } + /* Tell the siblings to test the policy */ + pthread_mutex_lock(&self->mutex); + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + /* Ensure they are both killed and don't exit cleanly. */ + pthread_join(self->sibling[0].tid, &status); + EXPECT_EQ(0x0, (long)status); + pthread_join(self->sibling[1].tid, &status); + EXPECT_EQ(0x0, (long)status); +} + +TEST_F(TSYNC, two_sibling_want_nnp) +{ + void *status; + + /* start siblings before any prctl() operations */ + tsync_start_sibling(&self->sibling[0]); + tsync_start_sibling(&self->sibling[1]); + while (self->sibling_count < TSYNC_SIBLINGS) { + sem_wait(&self->started); + self->sibling_count++; + } + + /* Tell the siblings to test no policy */ + pthread_mutex_lock(&self->mutex); + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + + /* Ensure they are both upset about lacking nnp. */ + pthread_join(self->sibling[0].tid, &status); + EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); + pthread_join(self->sibling[1].tid, &status); + EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); +} + +TEST_F(TSYNC, two_siblings_with_no_filter) +{ + long ret; + void *status; + + /* start siblings before any prctl() operations */ + tsync_start_sibling(&self->sibling[0]); + tsync_start_sibling(&self->sibling[1]); + while (self->sibling_count < TSYNC_SIBLINGS) { + sem_wait(&self->started); + self->sibling_count++; + } + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &self->apply_prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + ASSERT_EQ(0, ret) { + TH_LOG("Could install filter on all threads!"); + } + + /* Tell the siblings to test the policy */ + pthread_mutex_lock(&self->mutex); + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + + /* Ensure they are both killed and don't exit cleanly. */ + pthread_join(self->sibling[0].tid, &status); + EXPECT_EQ(0x0, (long)status); + pthread_join(self->sibling[1].tid, &status); + EXPECT_EQ(0x0, (long)status); +} + +TEST_F(TSYNC, two_siblings_with_one_divergence) +{ + long ret; + void *status; + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); + } + self->sibling[0].diverge = 1; + tsync_start_sibling(&self->sibling[0]); + tsync_start_sibling(&self->sibling[1]); + + while (self->sibling_count < TSYNC_SIBLINGS) { + sem_wait(&self->started); + self->sibling_count++; + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &self->apply_prog); + ASSERT_EQ(self->sibling[0].system_tid, ret) { + TH_LOG("Did not fail on diverged sibling."); + } + + /* Wake the threads */ + pthread_mutex_lock(&self->mutex); + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + + /* Ensure they are both unkilled. */ + pthread_join(self->sibling[0].tid, &status); + EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); + pthread_join(self->sibling[1].tid, &status); + EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); +} + +TEST_F(TSYNC, two_siblings_not_under_filter) +{ + long ret, sib; + void *status; + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + /* + * Sibling 0 will have its own seccomp policy + * and Sibling 1 will not be under seccomp at + * all. Sibling 1 will enter seccomp and 0 + * will cause failure. + */ + self->sibling[0].diverge = 1; + tsync_start_sibling(&self->sibling[0]); + tsync_start_sibling(&self->sibling[1]); + + while (self->sibling_count < TSYNC_SIBLINGS) { + sem_wait(&self->started); + self->sibling_count++; + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &self->apply_prog); + ASSERT_EQ(ret, self->sibling[0].system_tid) { + TH_LOG("Did not fail on diverged sibling."); + } + sib = 1; + if (ret == self->sibling[0].system_tid) + sib = 0; + + pthread_mutex_lock(&self->mutex); + + /* Increment the other siblings num_waits so we can clean up + * the one we just saw. + */ + self->sibling[!sib].num_waits += 1; + + /* Signal the thread to clean up*/ + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + pthread_join(self->sibling[sib].tid, &status); + EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); + /* Poll for actual task death. pthread_join doesn't guarantee it. */ + while (!kill(self->sibling[sib].system_tid, 0)) + sleep(0.1); + /* Switch to the remaining sibling */ + sib = !sib; + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &self->apply_prog); + ASSERT_EQ(0, ret) { + TH_LOG("Expected the remaining sibling to sync"); + }; + + pthread_mutex_lock(&self->mutex); + + /* If remaining sibling didn't have a chance to wake up during + * the first broadcast, manually reduce the num_waits now. + */ + if (self->sibling[sib].num_waits > 1) + self->sibling[sib].num_waits = 1; + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + pthread_join(self->sibling[sib].tid, &status); + EXPECT_EQ(0, (long)status); + /* Poll for actual task death. pthread_join doesn't guarantee it. */ + while (!kill(self->sibling[sib].system_tid, 0)) + sleep(0.1); + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &self->apply_prog); + ASSERT_EQ(0, ret); /* just us chickens */ +} + +/* Make sure restarted syscalls are seen directly as "restart_syscall". */ +TEST(syscall_restart) +{ + long ret; + unsigned long msg; + pid_t child_pid; + int pipefd[2]; + int status; + siginfo_t info = { }; + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + +#ifdef __NR_sigreturn + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0), +#endif + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 4, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0), + + /* Allow __NR_write for easy logging. */ + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + /* The nanosleep jump target. */ + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100), + /* The restart_syscall jump target. */ + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; +#if defined(__arm__) + struct utsname utsbuf; +#endif + + ASSERT_EQ(0, pipe(pipefd)); + + child_pid = fork(); + ASSERT_LE(0, child_pid); + if (child_pid == 0) { + /* Child uses EXPECT not ASSERT to deliver status correctly. */ + char buf = ' '; + struct timespec timeout = { }; + + /* Attach parent as tracer and stop. */ + EXPECT_EQ(0, ptrace(PTRACE_TRACEME)); + EXPECT_EQ(0, raise(SIGSTOP)); + + EXPECT_EQ(0, close(pipefd[1])); + + EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + EXPECT_EQ(0, ret) { + TH_LOG("Failed to install filter!"); + } + + EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { + TH_LOG("Failed to read() sync from parent"); + } + EXPECT_EQ('.', buf) { + TH_LOG("Failed to get sync data from read()"); + } + + /* Start nanosleep to be interrupted. */ + timeout.tv_sec = 1; + errno = 0; + EXPECT_EQ(0, nanosleep(&timeout, NULL)) { + TH_LOG("Call to nanosleep() failed (errno %d)", errno); + } + + /* Read final sync from parent. */ + EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { + TH_LOG("Failed final read() from parent"); + } + EXPECT_EQ('!', buf) { + TH_LOG("Failed to get final data from read()"); + } + + /* Directly report the status of our test harness results. */ + syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS + : EXIT_FAILURE); + } + EXPECT_EQ(0, close(pipefd[0])); + + /* Attach to child, setup options, and release. */ + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + ASSERT_EQ(true, WIFSTOPPED(status)); + ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL, + PTRACE_O_TRACESECCOMP)); + ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); + ASSERT_EQ(1, write(pipefd[1], ".", 1)); + + /* Wait for nanosleep() to start. */ + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + ASSERT_EQ(true, WIFSTOPPED(status)); + ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); + ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); + ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); + ASSERT_EQ(0x100, msg); + EXPECT_EQ(__NR_nanosleep, get_syscall(_metadata, child_pid)); + + /* Might as well check siginfo for sanity while we're here. */ + ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); + ASSERT_EQ(SIGTRAP, info.si_signo); + ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code); + EXPECT_EQ(0, info.si_errno); + EXPECT_EQ(getuid(), info.si_uid); + /* Verify signal delivery came from child (seccomp-triggered). */ + EXPECT_EQ(child_pid, info.si_pid); + + /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */ + ASSERT_EQ(0, kill(child_pid, SIGSTOP)); + ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + ASSERT_EQ(true, WIFSTOPPED(status)); + ASSERT_EQ(SIGSTOP, WSTOPSIG(status)); + /* Verify signal delivery came from parent now. */ + ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); + EXPECT_EQ(getpid(), info.si_pid); + + /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */ + ASSERT_EQ(0, kill(child_pid, SIGCONT)); + ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + ASSERT_EQ(true, WIFSTOPPED(status)); + ASSERT_EQ(SIGCONT, WSTOPSIG(status)); + ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); + + /* Wait for restart_syscall() to start. */ + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + ASSERT_EQ(true, WIFSTOPPED(status)); + ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); + ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); + ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); + + ASSERT_EQ(0x200, msg); + ret = get_syscall(_metadata, child_pid); +#if defined(__arm__) + /* + * FIXME: + * - native ARM registers do NOT expose true syscall. + * - compat ARM registers on ARM64 DO expose true syscall. + */ + ASSERT_EQ(0, uname(&utsbuf)); + if (strncmp(utsbuf.machine, "arm", 3) == 0) { + EXPECT_EQ(__NR_nanosleep, ret); + } else +#endif + { + EXPECT_EQ(__NR_restart_syscall, ret); + } + + /* Write again to end test. */ + ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); + ASSERT_EQ(1, write(pipefd[1], "!", 1)); + EXPECT_EQ(0, close(pipefd[1])); + + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + if (WIFSIGNALED(status) || WEXITSTATUS(status)) + _metadata->passed = 0; +} + +/* + * TODO: + * - add microbenchmarks + * - expand NNP testing + * - better arch-specific TRACE and TRAP handlers. + * - endianness checking when appropriate + * - 64-bit arg prodding + * - arch value testing (x86 modes especially) + * - ... + */ + +TEST_HARNESS_MAIN diff --git a/kernel/tools/testing/selftests/seccomp/test_harness.h b/kernel/tools/testing/selftests/seccomp/test_harness.h new file mode 100644 index 000000000..fb2841601 --- /dev/null +++ b/kernel/tools/testing/selftests/seccomp/test_harness.h @@ -0,0 +1,534 @@ +/* + * Copyright (c) 2012 The Chromium OS Authors. All rights reserved. + * Use of this source code is governed by the GPLv2 license. + * + * test_harness.h: simple C unit test helper. + * + * Usage: + * #include "test_harness.h" + * TEST(standalone_test) { + * do_some_stuff; + * EXPECT_GT(10, stuff) { + * stuff_state_t state; + * enumerate_stuff_state(&state); + * TH_LOG("expectation failed with state: %s", state.msg); + * } + * more_stuff; + * ASSERT_NE(some_stuff, NULL) TH_LOG("how did it happen?!"); + * last_stuff; + * EXPECT_EQ(0, last_stuff); + * } + * + * FIXTURE(my_fixture) { + * mytype_t *data; + * int awesomeness_level; + * }; + * FIXTURE_SETUP(my_fixture) { + * self->data = mytype_new(); + * ASSERT_NE(NULL, self->data); + * } + * FIXTURE_TEARDOWN(my_fixture) { + * mytype_free(self->data); + * } + * TEST_F(my_fixture, data_is_good) { + * EXPECT_EQ(1, is_my_data_good(self->data)); + * } + * + * TEST_HARNESS_MAIN + * + * API inspired by code.google.com/p/googletest + */ +#ifndef TEST_HARNESS_H_ +#define TEST_HARNESS_H_ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +/* All exported functionality should be declared through this macro. */ +#define TEST_API(x) _##x + +/* + * Exported APIs + */ + +/* TEST(name) { implementation } + * Defines a test by name. + * Names must be unique and tests must not be run in parallel. The + * implementation containing block is a function and scoping should be treated + * as such. Returning early may be performed with a bare "return;" statement. + * + * EXPECT_* and ASSERT_* are valid in a TEST() { } context. + */ +#define TEST TEST_API(TEST) + +/* TEST_SIGNAL(name, signal) { implementation } + * Defines a test by name and the expected term signal. + * Names must be unique and tests must not be run in parallel. The + * implementation containing block is a function and scoping should be treated + * as such. Returning early may be performed with a bare "return;" statement. + * + * EXPECT_* and ASSERT_* are valid in a TEST() { } context. + */ +#define TEST_SIGNAL TEST_API(TEST_SIGNAL) + +/* FIXTURE(datatype name) { + * type property1; + * ... + * }; + * Defines the data provided to TEST_F()-defined tests as |self|. It should be + * populated and cleaned up using FIXTURE_SETUP and FIXTURE_TEARDOWN. + */ +#define FIXTURE TEST_API(FIXTURE) + +/* FIXTURE_DATA(datatype name) + * This call may be used when the type of the fixture data + * is needed. In general, this should not be needed unless + * the |self| is being passed to a helper directly. + */ +#define FIXTURE_DATA TEST_API(FIXTURE_DATA) + +/* FIXTURE_SETUP(fixture name) { implementation } + * Populates the required "setup" function for a fixture. An instance of the + * datatype defined with _FIXTURE_DATA will be exposed as |self| for the + * implementation. + * + * ASSERT_* are valid for use in this context and will prempt the execution + * of any dependent fixture tests. + * + * A bare "return;" statement may be used to return early. + */ +#define FIXTURE_SETUP TEST_API(FIXTURE_SETUP) + +/* FIXTURE_TEARDOWN(fixture name) { implementation } + * Populates the required "teardown" function for a fixture. An instance of the + * datatype defined with _FIXTURE_DATA will be exposed as |self| for the + * implementation to clean up. + * + * A bare "return;" statement may be used to return early. + */ +#define FIXTURE_TEARDOWN TEST_API(FIXTURE_TEARDOWN) + +/* TEST_F(fixture, name) { implementation } + * Defines a test that depends on a fixture (e.g., is part of a test case). + * Very similar to TEST() except that |self| is the setup instance of fixture's + * datatype exposed for use by the implementation. + */ +#define TEST_F TEST_API(TEST_F) + +#define TEST_F_SIGNAL TEST_API(TEST_F_SIGNAL) + +/* Use once to append a main() to the test file. E.g., + * TEST_HARNESS_MAIN + */ +#define TEST_HARNESS_MAIN TEST_API(TEST_HARNESS_MAIN) + +/* + * Operators for use in TEST and TEST_F. + * ASSERT_* calls will stop test execution immediately. + * EXPECT_* calls will emit a failure warning, note it, and continue. + */ + +/* ASSERT_EQ(expected, measured): expected == measured */ +#define ASSERT_EQ TEST_API(ASSERT_EQ) +/* ASSERT_NE(expected, measured): expected != measured */ +#define ASSERT_NE TEST_API(ASSERT_NE) +/* ASSERT_LT(expected, measured): expected < measured */ +#define ASSERT_LT TEST_API(ASSERT_LT) +/* ASSERT_LE(expected, measured): expected <= measured */ +#define ASSERT_LE TEST_API(ASSERT_LE) +/* ASSERT_GT(expected, measured): expected > measured */ +#define ASSERT_GT TEST_API(ASSERT_GT) +/* ASSERT_GE(expected, measured): expected >= measured */ +#define ASSERT_GE TEST_API(ASSERT_GE) +/* ASSERT_NULL(measured): NULL == measured */ +#define ASSERT_NULL TEST_API(ASSERT_NULL) +/* ASSERT_TRUE(measured): measured != 0 */ +#define ASSERT_TRUE TEST_API(ASSERT_TRUE) +/* ASSERT_FALSE(measured): measured == 0 */ +#define ASSERT_FALSE TEST_API(ASSERT_FALSE) +/* ASSERT_STREQ(expected, measured): !strcmp(expected, measured) */ +#define ASSERT_STREQ TEST_API(ASSERT_STREQ) +/* ASSERT_STRNE(expected, measured): strcmp(expected, measured) */ +#define ASSERT_STRNE TEST_API(ASSERT_STRNE) +/* EXPECT_EQ(expected, measured): expected == measured */ +#define EXPECT_EQ TEST_API(EXPECT_EQ) +/* EXPECT_NE(expected, measured): expected != measured */ +#define EXPECT_NE TEST_API(EXPECT_NE) +/* EXPECT_LT(expected, measured): expected < measured */ +#define EXPECT_LT TEST_API(EXPECT_LT) +/* EXPECT_LE(expected, measured): expected <= measured */ +#define EXPECT_LE TEST_API(EXPECT_LE) +/* EXPECT_GT(expected, measured): expected > measured */ +#define EXPECT_GT TEST_API(EXPECT_GT) +/* EXPECT_GE(expected, measured): expected >= measured */ +#define EXPECT_GE TEST_API(EXPECT_GE) +/* EXPECT_NULL(measured): NULL == measured */ +#define EXPECT_NULL TEST_API(EXPECT_NULL) +/* EXPECT_TRUE(measured): 0 != measured */ +#define EXPECT_TRUE TEST_API(EXPECT_TRUE) +/* EXPECT_FALSE(measured): 0 == measured */ +#define EXPECT_FALSE TEST_API(EXPECT_FALSE) +/* EXPECT_STREQ(expected, measured): !strcmp(expected, measured) */ +#define EXPECT_STREQ TEST_API(EXPECT_STREQ) +/* EXPECT_STRNE(expected, measured): strcmp(expected, measured) */ +#define EXPECT_STRNE TEST_API(EXPECT_STRNE) + +/* TH_LOG(format, ...) + * Optional debug logging function available for use in tests. + * Logging may be enabled or disabled by defining TH_LOG_ENABLED. + * E.g., #define TH_LOG_ENABLED 1 + * If no definition is provided, logging is enabled by default. + */ +#define TH_LOG TEST_API(TH_LOG) + +/* + * Internal implementation. + * + */ + +/* Utilities exposed to the test definitions */ +#ifndef TH_LOG_STREAM +# define TH_LOG_STREAM stderr +#endif + +#ifndef TH_LOG_ENABLED +# define TH_LOG_ENABLED 1 +#endif + +#define _TH_LOG(fmt, ...) do { \ + if (TH_LOG_ENABLED) \ + __TH_LOG(fmt, ##__VA_ARGS__); \ +} while (0) + +/* Unconditional logger for internal use. */ +#define __TH_LOG(fmt, ...) \ + fprintf(TH_LOG_STREAM, "%s:%d:%s:" fmt "\n", \ + __FILE__, __LINE__, _metadata->name, ##__VA_ARGS__) + +/* Defines the test function and creates the registration stub. */ +#define _TEST(test_name) __TEST_IMPL(test_name, -1) + +#define _TEST_SIGNAL(test_name, signal) __TEST_IMPL(test_name, signal) + +#define __TEST_IMPL(test_name, _signal) \ + static void test_name(struct __test_metadata *_metadata); \ + static struct __test_metadata _##test_name##_object = \ + { name: "global." #test_name, \ + fn: &test_name, termsig: _signal }; \ + static void __attribute__((constructor)) _register_##test_name(void) \ + { \ + __register_test(&_##test_name##_object); \ + } \ + static void test_name( \ + struct __test_metadata __attribute__((unused)) *_metadata) + +/* Wraps the struct name so we have one less argument to pass around. */ +#define _FIXTURE_DATA(fixture_name) struct _test_data_##fixture_name + +/* Called once per fixture to setup the data and register. */ +#define _FIXTURE(fixture_name) \ + static void __attribute__((constructor)) \ + _register_##fixture_name##_data(void) \ + { \ + __fixture_count++; \ + } \ + _FIXTURE_DATA(fixture_name) + +/* Prepares the setup function for the fixture. |_metadata| is included + * so that ASSERT_* work as a convenience. + */ +#define _FIXTURE_SETUP(fixture_name) \ + void fixture_name##_setup( \ + struct __test_metadata __attribute__((unused)) *_metadata, \ + _FIXTURE_DATA(fixture_name) __attribute__((unused)) *self) +#define _FIXTURE_TEARDOWN(fixture_name) \ + void fixture_name##_teardown( \ + struct __test_metadata __attribute__((unused)) *_metadata, \ + _FIXTURE_DATA(fixture_name) __attribute__((unused)) *self) + +/* Emits test registration and helpers for fixture-based test + * cases. + * TODO(wad) register fixtures on dedicated test lists. + */ +#define _TEST_F(fixture_name, test_name) \ + __TEST_F_IMPL(fixture_name, test_name, -1) + +#define _TEST_F_SIGNAL(fixture_name, test_name, signal) \ + __TEST_F_IMPL(fixture_name, test_name, signal) + +#define __TEST_F_IMPL(fixture_name, test_name, signal) \ + static void fixture_name##_##test_name( \ + struct __test_metadata *_metadata, \ + _FIXTURE_DATA(fixture_name) *self); \ + static inline void wrapper_##fixture_name##_##test_name( \ + struct __test_metadata *_metadata) \ + { \ + /* fixture data is alloced, setup, and torn down per call. */ \ + _FIXTURE_DATA(fixture_name) self; \ + memset(&self, 0, sizeof(_FIXTURE_DATA(fixture_name))); \ + fixture_name##_setup(_metadata, &self); \ + /* Let setup failure terminate early. */ \ + if (!_metadata->passed) \ + return; \ + fixture_name##_##test_name(_metadata, &self); \ + fixture_name##_teardown(_metadata, &self); \ + } \ + static struct __test_metadata \ + _##fixture_name##_##test_name##_object = { \ + name: #fixture_name "." #test_name, \ + fn: &wrapper_##fixture_name##_##test_name, \ + termsig: signal, \ + }; \ + static void __attribute__((constructor)) \ + _register_##fixture_name##_##test_name(void) \ + { \ + __register_test(&_##fixture_name##_##test_name##_object); \ + } \ + static void fixture_name##_##test_name( \ + struct __test_metadata __attribute__((unused)) *_metadata, \ + _FIXTURE_DATA(fixture_name) __attribute__((unused)) *self) + +/* Exports a simple wrapper to run the test harness. */ +#define _TEST_HARNESS_MAIN \ + static void __attribute__((constructor)) \ + __constructor_order_last(void) \ + { \ + if (!__constructor_order) \ + __constructor_order = _CONSTRUCTOR_ORDER_BACKWARD; \ + } \ + int main(int argc, char **argv) { \ + return test_harness_run(argc, argv); \ + } + +#define _ASSERT_EQ(_expected, _seen) \ + __EXPECT(_expected, _seen, ==, 1) +#define _ASSERT_NE(_expected, _seen) \ + __EXPECT(_expected, _seen, !=, 1) +#define _ASSERT_LT(_expected, _seen) \ + __EXPECT(_expected, _seen, <, 1) +#define _ASSERT_LE(_expected, _seen) \ + __EXPECT(_expected, _seen, <=, 1) +#define _ASSERT_GT(_expected, _seen) \ + __EXPECT(_expected, _seen, >, 1) +#define _ASSERT_GE(_expected, _seen) \ + __EXPECT(_expected, _seen, >=, 1) +#define _ASSERT_NULL(_seen) \ + __EXPECT(NULL, _seen, ==, 1) + +#define _ASSERT_TRUE(_seen) \ + _ASSERT_NE(0, _seen) +#define _ASSERT_FALSE(_seen) \ + _ASSERT_EQ(0, _seen) +#define _ASSERT_STREQ(_expected, _seen) \ + __EXPECT_STR(_expected, _seen, ==, 1) +#define _ASSERT_STRNE(_expected, _seen) \ + __EXPECT_STR(_expected, _seen, !=, 1) + +#define _EXPECT_EQ(_expected, _seen) \ + __EXPECT(_expected, _seen, ==, 0) +#define _EXPECT_NE(_expected, _seen) \ + __EXPECT(_expected, _seen, !=, 0) +#define _EXPECT_LT(_expected, _seen) \ + __EXPECT(_expected, _seen, <, 0) +#define _EXPECT_LE(_expected, _seen) \ + __EXPECT(_expected, _seen, <=, 0) +#define _EXPECT_GT(_expected, _seen) \ + __EXPECT(_expected, _seen, >, 0) +#define _EXPECT_GE(_expected, _seen) \ + __EXPECT(_expected, _seen, >=, 0) + +#define _EXPECT_NULL(_seen) \ + __EXPECT(NULL, _seen, ==, 0) +#define _EXPECT_TRUE(_seen) \ + _EXPECT_NE(0, _seen) +#define _EXPECT_FALSE(_seen) \ + _EXPECT_EQ(0, _seen) + +#define _EXPECT_STREQ(_expected, _seen) \ + __EXPECT_STR(_expected, _seen, ==, 0) +#define _EXPECT_STRNE(_expected, _seen) \ + __EXPECT_STR(_expected, _seen, !=, 0) + +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) + +/* Support an optional handler after and ASSERT_* or EXPECT_*. The approach is + * not thread-safe, but it should be fine in most sane test scenarios. + * + * Using __bail(), which optionally abort()s, is the easiest way to early + * return while still providing an optional block to the API consumer. + */ +#define OPTIONAL_HANDLER(_assert) \ + for (; _metadata->trigger; _metadata->trigger = __bail(_assert)) + +#define __EXPECT(_expected, _seen, _t, _assert) do { \ + /* Avoid multiple evaluation of the cases */ \ + __typeof__(_expected) __exp = (_expected); \ + __typeof__(_seen) __seen = (_seen); \ + if (!(__exp _t __seen)) { \ + unsigned long long __exp_print = (unsigned long long)__exp; \ + unsigned long long __seen_print = (unsigned long long)__seen; \ + __TH_LOG("Expected %s (%llu) %s %s (%llu)", \ + #_expected, __exp_print, #_t, \ + #_seen, __seen_print); \ + _metadata->passed = 0; \ + /* Ensure the optional handler is triggered */ \ + _metadata->trigger = 1; \ + } \ +} while (0); OPTIONAL_HANDLER(_assert) + +#define __EXPECT_STR(_expected, _seen, _t, _assert) do { \ + const char *__exp = (_expected); \ + const char *__seen = (_seen); \ + if (!(strcmp(__exp, __seen) _t 0)) { \ + __TH_LOG("Expected '%s' %s '%s'.", __exp, #_t, __seen); \ + _metadata->passed = 0; \ + _metadata->trigger = 1; \ + } \ +} while (0); OPTIONAL_HANDLER(_assert) + +/* Contains all the information for test execution and status checking. */ +struct __test_metadata { + const char *name; + void (*fn)(struct __test_metadata *); + int termsig; + int passed; + int trigger; /* extra handler after the evaluation */ + struct __test_metadata *prev, *next; +}; + +/* Storage for the (global) tests to be run. */ +static struct __test_metadata *__test_list; +static unsigned int __test_count; +static unsigned int __fixture_count; +static int __constructor_order; + +#define _CONSTRUCTOR_ORDER_FORWARD 1 +#define _CONSTRUCTOR_ORDER_BACKWARD -1 + +/* + * Since constructors are called in reverse order, reverse the test + * list so tests are run in source declaration order. + * https://gcc.gnu.org/onlinedocs/gccint/Initialization.html + * However, it seems not all toolchains do this correctly, so use + * __constructor_order to detect which direction is called first + * and adjust list building logic to get things running in the right + * direction. + */ +static inline void __register_test(struct __test_metadata *t) +{ + __test_count++; + /* Circular linked list where only prev is circular. */ + if (__test_list == NULL) { + __test_list = t; + t->next = NULL; + t->prev = t; + return; + } + if (__constructor_order == _CONSTRUCTOR_ORDER_FORWARD) { + t->next = NULL; + t->prev = __test_list->prev; + t->prev->next = t; + __test_list->prev = t; + } else { + t->next = __test_list; + t->next->prev = t; + t->prev = t; + __test_list = t; + } +} + +static inline int __bail(int for_realz) +{ + if (for_realz) + abort(); + return 0; +} + +void __run_test(struct __test_metadata *t) +{ + pid_t child_pid; + int status; + + t->passed = 1; + t->trigger = 0; + printf("[ RUN ] %s\n", t->name); + child_pid = fork(); + if (child_pid < 0) { + printf("ERROR SPAWNING TEST CHILD\n"); + t->passed = 0; + } else if (child_pid == 0) { + t->fn(t); + _exit(t->passed); + } else { + /* TODO(wad) add timeout support. */ + waitpid(child_pid, &status, 0); + if (WIFEXITED(status)) { + t->passed = t->termsig == -1 ? WEXITSTATUS(status) : 0; + if (t->termsig != -1) { + fprintf(TH_LOG_STREAM, + "%s: Test exited normally " + "instead of by signal (code: %d)\n", + t->name, + WEXITSTATUS(status)); + } + } else if (WIFSIGNALED(status)) { + t->passed = 0; + if (WTERMSIG(status) == SIGABRT) { + fprintf(TH_LOG_STREAM, + "%s: Test terminated by assertion\n", + t->name); + } else if (WTERMSIG(status) == t->termsig) { + t->passed = 1; + } else { + fprintf(TH_LOG_STREAM, + "%s: Test terminated unexpectedly " + "by signal %d\n", + t->name, + WTERMSIG(status)); + } + } else { + fprintf(TH_LOG_STREAM, + "%s: Test ended in some other way [%u]\n", + t->name, + status); + } + } + printf("[ %4s ] %s\n", (t->passed ? "OK" : "FAIL"), t->name); +} + +static int test_harness_run(int __attribute__((unused)) argc, + char __attribute__((unused)) **argv) +{ + struct __test_metadata *t; + int ret = 0; + unsigned int count = 0; + unsigned int pass_count = 0; + + /* TODO(wad) add optional arguments similar to gtest. */ + printf("[==========] Running %u tests from %u test cases.\n", + __test_count, __fixture_count + 1); + for (t = __test_list; t; t = t->next) { + count++; + __run_test(t); + if (t->passed) + pass_count++; + else + ret = 1; + } + printf("[==========] %u / %u tests passed.\n", pass_count, count); + printf("[ %s ]\n", (ret ? "FAILED" : "PASSED")); + return ret; +} + +static void __attribute__((constructor)) __constructor_order_first(void) +{ + if (!__constructor_order) + __constructor_order = _CONSTRUCTOR_ORDER_FORWARD; +} + +#endif /* TEST_HARNESS_H_ */ diff --git a/kernel/tools/testing/selftests/static_keys/Makefile b/kernel/tools/testing/selftests/static_keys/Makefile new file mode 100644 index 000000000..9cdadf37f --- /dev/null +++ b/kernel/tools/testing/selftests/static_keys/Makefile @@ -0,0 +1,8 @@ +# Makefile for static keys selftests + +# No binaries, but make sure arg-less "make" doesn't trigger "run_tests" +all: + +TEST_PROGS := test_static_keys.sh + +include ../lib.mk diff --git a/kernel/tools/testing/selftests/static_keys/test_static_keys.sh b/kernel/tools/testing/selftests/static_keys/test_static_keys.sh new file mode 100755 index 000000000..1261e3fa1 --- /dev/null +++ b/kernel/tools/testing/selftests/static_keys/test_static_keys.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# Runs static keys kernel module tests + +if /sbin/modprobe -q test_static_key_base; then + if /sbin/modprobe -q test_static_keys; then + echo "static_key: ok" + /sbin/modprobe -q -r test_static_keys + /sbin/modprobe -q -r test_static_key_base + else + echo "static_keys: [FAIL]" + /sbin/modprobe -q -r test_static_key_base + fi +else + echo "static_key: [FAIL]" + exit 1 +fi diff --git a/kernel/tools/testing/selftests/timers/.gitignore b/kernel/tools/testing/selftests/timers/.gitignore new file mode 100644 index 000000000..ced998151 --- /dev/null +++ b/kernel/tools/testing/selftests/timers/.gitignore @@ -0,0 +1,18 @@ +alarmtimer-suspend +change_skew +clocksource-switch +inconsistency-check +leap-a-day +leapcrash +mqueue-lat +nanosleep +nsleep-lat +posix_timers +raw_skew +rtctest +set-2038 +set-tai +set-timer-lat +skew_consistency +threadtest +valid-adjtimex diff --git a/kernel/tools/testing/selftests/timers/Makefile b/kernel/tools/testing/selftests/timers/Makefile index 89a3f44bf..4a1be1b75 100644 --- a/kernel/tools/testing/selftests/timers/Makefile +++ b/kernel/tools/testing/selftests/timers/Makefile @@ -8,7 +8,7 @@ LDFLAGS += -lrt -lpthread TEST_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \ inconsistency-check raw_skew threadtest rtctest -TEST_PROGS_EXTENDED = alarmtimer-suspend valid-adjtimex change_skew \ +TEST_PROGS_EXTENDED = alarmtimer-suspend valid-adjtimex adjtick change_skew \ skew_consistency clocksource-switch leap-a-day \ leapcrash set-tai set-2038 @@ -24,6 +24,7 @@ include ../lib.mk run_destructive_tests: run_tests ./alarmtimer-suspend ./valid-adjtimex + ./adjtick ./change_skew ./skew_consistency ./clocksource-switch diff --git a/kernel/tools/testing/selftests/timers/adjtick.c b/kernel/tools/testing/selftests/timers/adjtick.c new file mode 100644 index 000000000..9887fd538 --- /dev/null +++ b/kernel/tools/testing/selftests/timers/adjtick.c @@ -0,0 +1,221 @@ +/* adjtimex() tick adjustment test + * by: John Stultz <john.stultz@linaro.org> + * (C) Copyright Linaro Limited 2015 + * Licensed under the GPLv2 + * + * To build: + * $ gcc adjtick.c -o adjtick -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <time.h> + +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define CLOCK_MONOTONIC_RAW 4 + +#define NSEC_PER_SEC 1000000000LL +#define USEC_PER_SEC 1000000 + +#define MILLION 1000000 + +long systick; + +long long llabs(long long val) +{ + if (val < 0) + val = -val; + return val; +} + +unsigned long long ts_to_nsec(struct timespec ts) +{ + return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; +} + +struct timespec nsec_to_ts(long long ns) +{ + struct timespec ts; + + ts.tv_sec = ns/NSEC_PER_SEC; + ts.tv_nsec = ns%NSEC_PER_SEC; + + return ts; +} + +long long diff_timespec(struct timespec start, struct timespec end) +{ + long long start_ns, end_ns; + + start_ns = ts_to_nsec(start); + end_ns = ts_to_nsec(end); + + return end_ns - start_ns; +} + +void get_monotonic_and_raw(struct timespec *mon, struct timespec *raw) +{ + struct timespec start, mid, end; + long long diff = 0, tmp; + int i; + + clock_gettime(CLOCK_MONOTONIC, mon); + clock_gettime(CLOCK_MONOTONIC_RAW, raw); + + /* Try to get a more tightly bound pairing */ + for (i = 0; i < 3; i++) { + long long newdiff; + + clock_gettime(CLOCK_MONOTONIC, &start); + clock_gettime(CLOCK_MONOTONIC_RAW, &mid); + clock_gettime(CLOCK_MONOTONIC, &end); + + newdiff = diff_timespec(start, end); + if (diff == 0 || newdiff < diff) { + diff = newdiff; + *raw = mid; + tmp = (ts_to_nsec(start) + ts_to_nsec(end))/2; + *mon = nsec_to_ts(tmp); + } + } +} + +long long get_ppm_drift(void) +{ + struct timespec mon_start, raw_start, mon_end, raw_end; + long long delta1, delta2, eppm; + + get_monotonic_and_raw(&mon_start, &raw_start); + + sleep(15); + + get_monotonic_and_raw(&mon_end, &raw_end); + + delta1 = diff_timespec(mon_start, mon_end); + delta2 = diff_timespec(raw_start, raw_end); + + eppm = (delta1*MILLION)/delta2 - MILLION; + + return eppm; +} + +int check_tick_adj(long tickval) +{ + long long eppm, ppm; + struct timex tx1; + + tx1.modes = ADJ_TICK; + tx1.modes |= ADJ_OFFSET; + tx1.modes |= ADJ_FREQUENCY; + tx1.modes |= ADJ_STATUS; + + tx1.status = STA_PLL; + tx1.offset = 0; + tx1.freq = 0; + tx1.tick = tickval; + + adjtimex(&tx1); + + sleep(1); + + ppm = ((long long)tickval * MILLION)/systick - MILLION; + printf("Estimating tick (act: %ld usec, %lld ppm): ", tickval, ppm); + + eppm = get_ppm_drift(); + printf("%lld usec, %lld ppm", systick + (systick * eppm / MILLION), eppm); + + tx1.modes = 0; + adjtimex(&tx1); + + if (tx1.offset || tx1.freq || tx1.tick != tickval) { + printf(" [ERROR]\n"); + printf("\tUnexpected adjtimex return values, make sure ntpd is not running.\n"); + return -1; + } + + /* + * Here we use 100ppm difference as an error bound. + * We likely should see better, but some coarse clocksources + * cannot match the HZ tick size accurately, so we have a + * internal correction factor that doesn't scale exactly + * with the adjustment, resulting in > 10ppm error during + * a 10% adjustment. 100ppm also gives us more breathing + * room for interruptions during the measurement. + */ + if (llabs(eppm - ppm) > 100) { + printf(" [FAILED]\n"); + return -1; + } + printf(" [OK]\n"); + + return 0; +} + +int main(int argv, char **argc) +{ + struct timespec raw; + long tick, max, interval, err; + struct timex tx1; + + err = 0; + setbuf(stdout, NULL); + + if (clock_gettime(CLOCK_MONOTONIC_RAW, &raw)) { + printf("ERR: NO CLOCK_MONOTONIC_RAW\n"); + return -1; + } + + printf("Each iteration takes about 15 seconds\n"); + + systick = sysconf(_SC_CLK_TCK); + systick = USEC_PER_SEC/sysconf(_SC_CLK_TCK); + max = systick/10; /* +/- 10% */ + interval = max/4; /* in 4 steps each side */ + + for (tick = (systick - max); tick < (systick + max); tick += interval) { + if (check_tick_adj(tick)) { + err = 1; + break; + } + } + + /* Reset things to zero */ + tx1.modes = ADJ_TICK; + tx1.modes |= ADJ_OFFSET; + tx1.modes |= ADJ_FREQUENCY; + + tx1.offset = 0; + tx1.freq = 0; + tx1.tick = systick; + + adjtimex(&tx1); + + if (err) + return ksft_exit_fail(); + + return ksft_exit_pass(); +} diff --git a/kernel/tools/testing/selftests/timers/alarmtimer-suspend.c b/kernel/tools/testing/selftests/timers/alarmtimer-suspend.c index aaffbde1d..72cacf538 100644 --- a/kernel/tools/testing/selftests/timers/alarmtimer-suspend.c +++ b/kernel/tools/testing/selftests/timers/alarmtimer-suspend.c @@ -57,7 +57,7 @@ static inline int ksft_exit_fail(void) #define NSEC_PER_SEC 1000000000ULL -#define UNREASONABLE_LAT (NSEC_PER_SEC * 4) /* hopefully we resume in 4secs */ +#define UNREASONABLE_LAT (NSEC_PER_SEC * 5) /* hopefully we resume in 5 secs */ #define SUSPEND_SECS 15 int alarmcount; @@ -152,7 +152,11 @@ int main(void) alarm_clock_id++) { alarmcount = 0; - timer_create(alarm_clock_id, &se, &tm1); + if (timer_create(alarm_clock_id, &se, &tm1) == -1) { + printf("timer_create failled, %s unspported?\n", + clockstring(alarm_clock_id)); + break; + } clock_gettime(alarm_clock_id, &start_time); printf("Start time (%s): %ld:%ld\n", clockstring(alarm_clock_id), @@ -172,7 +176,7 @@ int main(void) while (alarmcount < 10) { int ret; - sleep(1); + sleep(3); ret = system("echo mem > /sys/power/state"); if (ret) break; diff --git a/kernel/tools/testing/selftests/timers/leap-a-day.c b/kernel/tools/testing/selftests/timers/leap-a-day.c index b8272e6c4..fb46ad6ac 100644 --- a/kernel/tools/testing/selftests/timers/leap-a-day.c +++ b/kernel/tools/testing/selftests/timers/leap-a-day.c @@ -44,6 +44,7 @@ #include <time.h> #include <sys/time.h> #include <sys/timex.h> +#include <sys/errno.h> #include <string.h> #include <signal.h> #include <unistd.h> @@ -63,6 +64,9 @@ static inline int ksft_exit_fail(void) #define NSEC_PER_SEC 1000000000ULL #define CLOCK_TAI 11 +time_t next_leap; +int error_found; + /* returns 1 if a <= b, 0 otherwise */ static inline int in_order(struct timespec a, struct timespec b) { @@ -134,6 +138,35 @@ void handler(int unused) exit(0); } +void sigalarm(int signo) +{ + struct timex tx; + int ret; + + tx.modes = 0; + ret = adjtimex(&tx); + + if (tx.time.tv_sec < next_leap) { + printf("Error: Early timer expiration! (Should be %ld)\n", next_leap); + error_found = 1; + printf("adjtimex: %10ld sec + %6ld us (%i)\t%s\n", + tx.time.tv_sec, + tx.time.tv_usec, + tx.tai, + time_state_str(ret)); + } + if (ret != TIME_WAIT) { + printf("Error: Timer seeing incorrect NTP state? (Should be TIME_WAIT)\n"); + error_found = 1; + printf("adjtimex: %10ld sec + %6ld us (%i)\t%s\n", + tx.time.tv_sec, + tx.time.tv_usec, + tx.tai, + time_state_str(ret)); + } +} + + /* Test for known hrtimer failure */ void test_hrtimer_failure(void) { @@ -144,12 +177,19 @@ void test_hrtimer_failure(void) clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &target, NULL); clock_gettime(CLOCK_REALTIME, &now); - if (!in_order(target, now)) + if (!in_order(target, now)) { printf("ERROR: hrtimer early expiration failure observed.\n"); + error_found = 1; + } } int main(int argc, char **argv) { + timer_t tm1; + struct itimerspec its1; + struct sigevent se; + struct sigaction act; + int signum = SIGRTMAX; int settime = 0; int tai_time = 0; int insert = 1; @@ -191,6 +231,12 @@ int main(int argc, char **argv) signal(SIGINT, handler); signal(SIGKILL, handler); + /* Set up timer signal handler: */ + sigfillset(&act.sa_mask); + act.sa_flags = 0; + act.sa_handler = sigalarm; + sigaction(signum, &act, NULL); + if (iterations < 0) printf("This runs continuously. Press ctrl-c to stop\n"); else @@ -201,7 +247,7 @@ int main(int argc, char **argv) int ret; struct timespec ts; struct timex tx; - time_t now, next_leap; + time_t now; /* Get the current time */ clock_gettime(CLOCK_REALTIME, &ts); @@ -251,10 +297,27 @@ int main(int argc, char **argv) printf("Scheduling leap second for %s", ctime(&next_leap)); + /* Set up timer */ + printf("Setting timer for %ld - %s", next_leap, ctime(&next_leap)); + memset(&se, 0, sizeof(se)); + se.sigev_notify = SIGEV_SIGNAL; + se.sigev_signo = signum; + se.sigev_value.sival_int = 0; + if (timer_create(CLOCK_REALTIME, &se, &tm1) == -1) { + printf("Error: timer_create failed\n"); + return ksft_exit_fail(); + } + its1.it_value.tv_sec = next_leap; + its1.it_value.tv_nsec = 0; + its1.it_interval.tv_sec = 0; + its1.it_interval.tv_nsec = 0; + timer_settime(tm1, TIMER_ABSTIME, &its1, NULL); + /* Wake up 3 seconds before leap */ ts.tv_sec = next_leap - 3; ts.tv_nsec = 0; + while (clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &ts, NULL)) printf("Something woke us up, returning to sleep\n"); @@ -276,6 +339,7 @@ int main(int argc, char **argv) while (now < next_leap + 2) { char buf[26]; struct timespec tai; + int ret; tx.modes = 0; ret = adjtimex(&tx); @@ -308,8 +372,13 @@ int main(int argc, char **argv) /* Note if kernel has known hrtimer failure */ test_hrtimer_failure(); - printf("Leap complete\n\n"); - + printf("Leap complete\n"); + if (error_found) { + printf("Errors observed\n"); + clear_time_state(); + return ksft_exit_fail(); + } + printf("\n"); if ((iterations != -1) && !(--iterations)) break; } diff --git a/kernel/tools/testing/selftests/timers/nanosleep.c b/kernel/tools/testing/selftests/timers/nanosleep.c index 8a3c29de7..ff942ff7c 100644 --- a/kernel/tools/testing/selftests/timers/nanosleep.c +++ b/kernel/tools/testing/selftests/timers/nanosleep.c @@ -19,6 +19,7 @@ * GNU General Public License for more details. */ +#include <errno.h> #include <stdio.h> #include <stdlib.h> #include <time.h> diff --git a/kernel/tools/testing/selftests/timers/rtctest.c b/kernel/tools/testing/selftests/timers/rtctest.c index d80ae8523..624bce51b 100644 --- a/kernel/tools/testing/selftests/timers/rtctest.c +++ b/kernel/tools/testing/selftests/timers/rtctest.c @@ -61,7 +61,7 @@ int main(int argc, char **argv) /* Turn on update interrupts (one per second) */ retval = ioctl(fd, RTC_UIE_ON, 0); if (retval == -1) { - if (errno == ENOTTY) { + if (errno == EINVAL) { fprintf(stderr, "\n...Update IRQs not supported.\n"); goto test_READ; diff --git a/kernel/tools/testing/selftests/vm/Makefile b/kernel/tools/testing/selftests/vm/Makefile index a5ce9534e..e4bb1de1d 100644 --- a/kernel/tools/testing/selftests/vm/Makefile +++ b/kernel/tools/testing/selftests/vm/Makefile @@ -1,12 +1,24 @@ # Makefile for vm selftests -CFLAGS = -Wall -BINARIES = hugepage-mmap hugepage-shm map_hugetlb thuge-gen hugetlbfstest +CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS) +BINARIES = compaction_test +BINARIES += hugepage-mmap +BINARIES += hugepage-shm +BINARIES += map_hugetlb +BINARIES += mlock2-tests +BINARIES += on-fault-limit +BINARIES += thuge-gen BINARIES += transhuge-stress +BINARIES += userfaultfd all: $(BINARIES) %: %.c $(CC) $(CFLAGS) -o $@ $^ -lrt +userfaultfd: userfaultfd.c ../../../../usr/include/linux/kernel.h + $(CC) $(CFLAGS) -O2 -o $@ $< -lpthread + +../../../../usr/include/linux/kernel.h: + make -C ../../../.. headers_install TEST_PROGS := run_vmtests TEST_FILES := $(BINARIES) diff --git a/kernel/tools/testing/selftests/vm/compaction_test.c b/kernel/tools/testing/selftests/vm/compaction_test.c new file mode 100644 index 000000000..932ff577f --- /dev/null +++ b/kernel/tools/testing/selftests/vm/compaction_test.c @@ -0,0 +1,225 @@ +/* + * + * A test for the patch "Allow compaction of unevictable pages". + * With this patch we should be able to allocate at least 1/4 + * of RAM in huge pages. Without the patch much less is + * allocated. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <fcntl.h> +#include <errno.h> +#include <unistd.h> +#include <string.h> + +#define MAP_SIZE 1048576 + +struct map_list { + void *map; + struct map_list *next; +}; + +int read_memory_info(unsigned long *memfree, unsigned long *hugepagesize) +{ + char buffer[256] = {0}; + char *cmd = "cat /proc/meminfo | grep -i memfree | grep -o '[0-9]*'"; + FILE *cmdfile = popen(cmd, "r"); + + if (!(fgets(buffer, sizeof(buffer), cmdfile))) { + perror("Failed to read meminfo\n"); + return -1; + } + + pclose(cmdfile); + + *memfree = atoll(buffer); + cmd = "cat /proc/meminfo | grep -i hugepagesize | grep -o '[0-9]*'"; + cmdfile = popen(cmd, "r"); + + if (!(fgets(buffer, sizeof(buffer), cmdfile))) { + perror("Failed to read meminfo\n"); + return -1; + } + + pclose(cmdfile); + *hugepagesize = atoll(buffer); + + return 0; +} + +int prereq(void) +{ + char allowed; + int fd; + + fd = open("/proc/sys/vm/compact_unevictable_allowed", + O_RDONLY | O_NONBLOCK); + if (fd < 0) { + perror("Failed to open\n" + "/proc/sys/vm/compact_unevictable_allowed\n"); + return -1; + } + + if (read(fd, &allowed, sizeof(char)) != sizeof(char)) { + perror("Failed to read from\n" + "/proc/sys/vm/compact_unevictable_allowed\n"); + close(fd); + return -1; + } + + close(fd); + if (allowed == '1') + return 0; + + return -1; +} + +int check_compaction(unsigned long mem_free, unsigned int hugepage_size) +{ + int fd; + int compaction_index = 0; + char initial_nr_hugepages[10] = {0}; + char nr_hugepages[10] = {0}; + + /* We want to test with 80% of available memory. Else, OOM killer comes + in to play */ + mem_free = mem_free * 0.8; + + fd = open("/proc/sys/vm/nr_hugepages", O_RDWR | O_NONBLOCK); + if (fd < 0) { + perror("Failed to open /proc/sys/vm/nr_hugepages"); + return -1; + } + + if (read(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) <= 0) { + perror("Failed to read from /proc/sys/vm/nr_hugepages"); + goto close_fd; + } + + /* Start with the initial condition of 0 huge pages*/ + if (write(fd, "0", sizeof(char)) != sizeof(char)) { + perror("Failed to write to /proc/sys/vm/nr_hugepages\n"); + goto close_fd; + } + + lseek(fd, 0, SEEK_SET); + + /* Request a large number of huge pages. The Kernel will allocate + as much as it can */ + if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) { + perror("Failed to write to /proc/sys/vm/nr_hugepages\n"); + goto close_fd; + } + + lseek(fd, 0, SEEK_SET); + + if (read(fd, nr_hugepages, sizeof(nr_hugepages)) <= 0) { + perror("Failed to read from /proc/sys/vm/nr_hugepages\n"); + goto close_fd; + } + + /* We should have been able to request at least 1/3 rd of the memory in + huge pages */ + compaction_index = mem_free/(atoi(nr_hugepages) * hugepage_size); + + if (compaction_index > 3) { + printf("No of huge pages allocated = %d\n", + (atoi(nr_hugepages))); + fprintf(stderr, "ERROR: Less that 1/%d of memory is available\n" + "as huge pages\n", compaction_index); + goto close_fd; + } + + printf("No of huge pages allocated = %d\n", + (atoi(nr_hugepages))); + + if (write(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) + != strlen(initial_nr_hugepages)) { + perror("Failed to write to /proc/sys/vm/nr_hugepages\n"); + goto close_fd; + } + + close(fd); + return 0; + + close_fd: + close(fd); + printf("Not OK. Compaction test failed."); + return -1; +} + + +int main(int argc, char **argv) +{ + struct rlimit lim; + struct map_list *list, *entry; + size_t page_size, i; + void *map = NULL; + unsigned long mem_free = 0; + unsigned long hugepage_size = 0; + unsigned long mem_fragmentable = 0; + + if (prereq() != 0) { + printf("Either the sysctl compact_unevictable_allowed is not\n" + "set to 1 or couldn't read the proc file.\n" + "Skipping the test\n"); + return 0; + } + + lim.rlim_cur = RLIM_INFINITY; + lim.rlim_max = RLIM_INFINITY; + if (setrlimit(RLIMIT_MEMLOCK, &lim)) { + perror("Failed to set rlimit:\n"); + return -1; + } + + page_size = getpagesize(); + + list = NULL; + + if (read_memory_info(&mem_free, &hugepage_size) != 0) { + printf("ERROR: Cannot read meminfo\n"); + return -1; + } + + mem_fragmentable = mem_free * 0.8 / 1024; + + while (mem_fragmentable > 0) { + map = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_LOCKED, -1, 0); + if (map == MAP_FAILED) + break; + + entry = malloc(sizeof(struct map_list)); + if (!entry) { + munmap(map, MAP_SIZE); + break; + } + entry->map = map; + entry->next = list; + list = entry; + + /* Write something (in this case the address of the map) to + * ensure that KSM can't merge the mapped pages + */ + for (i = 0; i < MAP_SIZE; i += page_size) + *(unsigned long *)(map + i) = (unsigned long)map + i; + + mem_fragmentable--; + } + + for (entry = list; entry != NULL; entry = entry->next) { + munmap(entry->map, MAP_SIZE); + if (!entry->next) + break; + entry = entry->next; + } + + if (check_compaction(mem_free, hugepage_size) == 0) + return 0; + + return -1; +} diff --git a/kernel/tools/testing/selftests/vm/hugetlbfstest.c b/kernel/tools/testing/selftests/vm/hugetlbfstest.c deleted file mode 100644 index 02e1072ec..000000000 --- a/kernel/tools/testing/selftests/vm/hugetlbfstest.c +++ /dev/null @@ -1,86 +0,0 @@ -#define _GNU_SOURCE -#include <assert.h> -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/mman.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <unistd.h> - -typedef unsigned long long u64; - -static size_t length = 1 << 24; - -static u64 read_rss(void) -{ - char buf[4096], *s = buf; - int i, fd; - u64 rss; - - fd = open("/proc/self/statm", O_RDONLY); - assert(fd > 2); - memset(buf, 0, sizeof(buf)); - read(fd, buf, sizeof(buf) - 1); - for (i = 0; i < 1; i++) - s = strchr(s, ' ') + 1; - rss = strtoull(s, NULL, 10); - return rss << 12; /* assumes 4k pagesize */ -} - -static void do_mmap(int fd, int extra_flags, int unmap) -{ - int *p; - int flags = MAP_PRIVATE | MAP_POPULATE | extra_flags; - u64 before, after; - int ret; - - before = read_rss(); - p = mmap(NULL, length, PROT_READ | PROT_WRITE, flags, fd, 0); - assert(p != MAP_FAILED || - !"mmap returned an unexpected error"); - after = read_rss(); - assert(llabs(after - before - length) < 0x40000 || - !"rss didn't grow as expected"); - if (!unmap) - return; - ret = munmap(p, length); - assert(!ret || !"munmap returned an unexpected error"); - after = read_rss(); - assert(llabs(after - before) < 0x40000 || - !"rss didn't shrink as expected"); -} - -static int open_file(const char *path) -{ - int fd, err; - - unlink(path); - fd = open(path, O_CREAT | O_RDWR | O_TRUNC | O_EXCL - | O_LARGEFILE | O_CLOEXEC, 0600); - assert(fd > 2); - unlink(path); - err = ftruncate(fd, length); - assert(!err); - return fd; -} - -int main(void) -{ - int hugefd, fd; - - fd = open_file("/dev/shm/hugetlbhog"); - hugefd = open_file("/hugepages/hugetlbhog"); - - system("echo 100 > /proc/sys/vm/nr_hugepages"); - do_mmap(-1, MAP_ANONYMOUS, 1); - do_mmap(fd, 0, 1); - do_mmap(-1, MAP_ANONYMOUS | MAP_HUGETLB, 1); - do_mmap(hugefd, 0, 1); - do_mmap(hugefd, MAP_HUGETLB, 1); - /* Leak the last one to test do_exit() */ - do_mmap(-1, MAP_ANONYMOUS | MAP_HUGETLB, 0); - printf("oll korrekt.\n"); - return 0; -} diff --git a/kernel/tools/testing/selftests/vm/mlock2-tests.c b/kernel/tools/testing/selftests/vm/mlock2-tests.c new file mode 100644 index 000000000..02ca5e017 --- /dev/null +++ b/kernel/tools/testing/selftests/vm/mlock2-tests.c @@ -0,0 +1,737 @@ +#define _GNU_SOURCE +#include <sys/mman.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <syscall.h> +#include <errno.h> +#include <stdbool.h> + +#ifndef MLOCK_ONFAULT +#define MLOCK_ONFAULT 1 +#endif + +#ifndef MCL_ONFAULT +#define MCL_ONFAULT (MCL_FUTURE << 1) +#endif + +static int mlock2_(void *start, size_t len, int flags) +{ +#ifdef __NR_mlock2 + return syscall(__NR_mlock2, start, len, flags); +#else + errno = ENOSYS; + return -1; +#endif +} + +struct vm_boundaries { + unsigned long start; + unsigned long end; +}; + +static int get_vm_area(unsigned long addr, struct vm_boundaries *area) +{ + FILE *file; + int ret = 1; + char line[1024] = {0}; + char *end_addr; + char *stop; + unsigned long start; + unsigned long end; + + if (!area) + return ret; + + file = fopen("/proc/self/maps", "r"); + if (!file) { + perror("fopen"); + return ret; + } + + memset(area, 0, sizeof(struct vm_boundaries)); + + while(fgets(line, 1024, file)) { + end_addr = strchr(line, '-'); + if (!end_addr) { + printf("cannot parse /proc/self/maps\n"); + goto out; + } + *end_addr = '\0'; + end_addr++; + stop = strchr(end_addr, ' '); + if (!stop) { + printf("cannot parse /proc/self/maps\n"); + goto out; + } + stop = '\0'; + + sscanf(line, "%lx", &start); + sscanf(end_addr, "%lx", &end); + + if (start <= addr && end > addr) { + area->start = start; + area->end = end; + ret = 0; + goto out; + } + } +out: + fclose(file); + return ret; +} + +static uint64_t get_pageflags(unsigned long addr) +{ + FILE *file; + uint64_t pfn; + unsigned long offset; + + file = fopen("/proc/self/pagemap", "r"); + if (!file) { + perror("fopen pagemap"); + _exit(1); + } + + offset = addr / getpagesize() * sizeof(pfn); + + if (fseek(file, offset, SEEK_SET)) { + perror("fseek pagemap"); + _exit(1); + } + + if (fread(&pfn, sizeof(pfn), 1, file) != 1) { + perror("fread pagemap"); + _exit(1); + } + + fclose(file); + return pfn; +} + +static uint64_t get_kpageflags(unsigned long pfn) +{ + uint64_t flags; + FILE *file; + + file = fopen("/proc/kpageflags", "r"); + if (!file) { + perror("fopen kpageflags"); + _exit(1); + } + + if (fseek(file, pfn * sizeof(flags), SEEK_SET)) { + perror("fseek kpageflags"); + _exit(1); + } + + if (fread(&flags, sizeof(flags), 1, file) != 1) { + perror("fread kpageflags"); + _exit(1); + } + + fclose(file); + return flags; +} + +static FILE *seek_to_smaps_entry(unsigned long addr) +{ + FILE *file; + char *line = NULL; + size_t size = 0; + unsigned long start, end; + char perms[5]; + unsigned long offset; + char dev[32]; + unsigned long inode; + char path[BUFSIZ]; + + file = fopen("/proc/self/smaps", "r"); + if (!file) { + perror("fopen smaps"); + _exit(1); + } + + while (getline(&line, &size, file) > 0) { + if (sscanf(line, "%lx-%lx %s %lx %s %lu %s\n", + &start, &end, perms, &offset, dev, &inode, path) < 6) + goto next; + + if (start <= addr && addr < end) + goto out; + +next: + free(line); + line = NULL; + size = 0; + } + + fclose(file); + file = NULL; + +out: + free(line); + return file; +} + +#define VMFLAGS "VmFlags:" + +static bool is_vmflag_set(unsigned long addr, const char *vmflag) +{ + char *line = NULL; + char *flags; + size_t size = 0; + bool ret = false; + FILE *smaps; + + smaps = seek_to_smaps_entry(addr); + if (!smaps) { + printf("Unable to parse /proc/self/smaps\n"); + goto out; + } + + while (getline(&line, &size, smaps) > 0) { + if (!strstr(line, VMFLAGS)) { + free(line); + line = NULL; + size = 0; + continue; + } + + flags = line + strlen(VMFLAGS); + ret = (strstr(flags, vmflag) != NULL); + goto out; + } + +out: + free(line); + fclose(smaps); + return ret; +} + +#define SIZE "Size:" +#define RSS "Rss:" +#define LOCKED "lo" + +static bool is_vma_lock_on_fault(unsigned long addr) +{ + bool ret = false; + bool locked; + FILE *smaps = NULL; + unsigned long vma_size, vma_rss; + char *line = NULL; + char *value; + size_t size = 0; + + locked = is_vmflag_set(addr, LOCKED); + if (!locked) + goto out; + + smaps = seek_to_smaps_entry(addr); + if (!smaps) { + printf("Unable to parse /proc/self/smaps\n"); + goto out; + } + + while (getline(&line, &size, smaps) > 0) { + if (!strstr(line, SIZE)) { + free(line); + line = NULL; + size = 0; + continue; + } + + value = line + strlen(SIZE); + if (sscanf(value, "%lu kB", &vma_size) < 1) { + printf("Unable to parse smaps entry for Size\n"); + goto out; + } + break; + } + + while (getline(&line, &size, smaps) > 0) { + if (!strstr(line, RSS)) { + free(line); + line = NULL; + size = 0; + continue; + } + + value = line + strlen(RSS); + if (sscanf(value, "%lu kB", &vma_rss) < 1) { + printf("Unable to parse smaps entry for Rss\n"); + goto out; + } + break; + } + + ret = locked && (vma_rss < vma_size); +out: + free(line); + if (smaps) + fclose(smaps); + return ret; +} + +#define PRESENT_BIT 0x8000000000000000ULL +#define PFN_MASK 0x007FFFFFFFFFFFFFULL +#define UNEVICTABLE_BIT (1UL << 18) + +static int lock_check(char *map) +{ + unsigned long page_size = getpagesize(); + uint64_t page1_flags, page2_flags; + + page1_flags = get_pageflags((unsigned long)map); + page2_flags = get_pageflags((unsigned long)map + page_size); + + /* Both pages should be present */ + if (((page1_flags & PRESENT_BIT) == 0) || + ((page2_flags & PRESENT_BIT) == 0)) { + printf("Failed to make both pages present\n"); + return 1; + } + + page1_flags = get_kpageflags(page1_flags & PFN_MASK); + page2_flags = get_kpageflags(page2_flags & PFN_MASK); + + /* Both pages should be unevictable */ + if (((page1_flags & UNEVICTABLE_BIT) == 0) || + ((page2_flags & UNEVICTABLE_BIT) == 0)) { + printf("Failed to make both pages unevictable\n"); + return 1; + } + + if (!is_vmflag_set((unsigned long)map, LOCKED)) { + printf("VMA flag %s is missing on page 1\n", LOCKED); + return 1; + } + + if (!is_vmflag_set((unsigned long)map + page_size, LOCKED)) { + printf("VMA flag %s is missing on page 2\n", LOCKED); + return 1; + } + + return 0; +} + +static int unlock_lock_check(char *map) +{ + unsigned long page_size = getpagesize(); + uint64_t page1_flags, page2_flags; + + page1_flags = get_pageflags((unsigned long)map); + page2_flags = get_pageflags((unsigned long)map + page_size); + page1_flags = get_kpageflags(page1_flags & PFN_MASK); + page2_flags = get_kpageflags(page2_flags & PFN_MASK); + + if ((page1_flags & UNEVICTABLE_BIT) || (page2_flags & UNEVICTABLE_BIT)) { + printf("A page is still marked unevictable after unlock\n"); + return 1; + } + + if (is_vmflag_set((unsigned long)map, LOCKED)) { + printf("VMA flag %s is present on page 1 after unlock\n", LOCKED); + return 1; + } + + if (is_vmflag_set((unsigned long)map + page_size, LOCKED)) { + printf("VMA flag %s is present on page 2 after unlock\n", LOCKED); + return 1; + } + + return 0; +} + +static int test_mlock_lock() +{ + char *map; + int ret = 1; + unsigned long page_size = getpagesize(); + + map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + if (map == MAP_FAILED) { + perror("test_mlock_locked mmap"); + goto out; + } + + if (mlock2_(map, 2 * page_size, 0)) { + if (errno == ENOSYS) { + printf("Cannot call new mlock family, skipping test\n"); + _exit(0); + } + perror("mlock2(0)"); + goto unmap; + } + + if (lock_check(map)) + goto unmap; + + /* Now unlock and recheck attributes */ + if (munlock(map, 2 * page_size)) { + perror("munlock()"); + goto unmap; + } + + ret = unlock_lock_check(map); + +unmap: + munmap(map, 2 * page_size); +out: + return ret; +} + +static int onfault_check(char *map) +{ + unsigned long page_size = getpagesize(); + uint64_t page1_flags, page2_flags; + + page1_flags = get_pageflags((unsigned long)map); + page2_flags = get_pageflags((unsigned long)map + page_size); + + /* Neither page should be present */ + if ((page1_flags & PRESENT_BIT) || (page2_flags & PRESENT_BIT)) { + printf("Pages were made present by MLOCK_ONFAULT\n"); + return 1; + } + + *map = 'a'; + page1_flags = get_pageflags((unsigned long)map); + page2_flags = get_pageflags((unsigned long)map + page_size); + + /* Only page 1 should be present */ + if ((page1_flags & PRESENT_BIT) == 0) { + printf("Page 1 is not present after fault\n"); + return 1; + } else if (page2_flags & PRESENT_BIT) { + printf("Page 2 was made present\n"); + return 1; + } + + page1_flags = get_kpageflags(page1_flags & PFN_MASK); + + /* Page 1 should be unevictable */ + if ((page1_flags & UNEVICTABLE_BIT) == 0) { + printf("Failed to make faulted page unevictable\n"); + return 1; + } + + if (!is_vma_lock_on_fault((unsigned long)map)) { + printf("VMA is not marked for lock on fault\n"); + return 1; + } + + if (!is_vma_lock_on_fault((unsigned long)map + page_size)) { + printf("VMA is not marked for lock on fault\n"); + return 1; + } + + return 0; +} + +static int unlock_onfault_check(char *map) +{ + unsigned long page_size = getpagesize(); + uint64_t page1_flags; + + page1_flags = get_pageflags((unsigned long)map); + page1_flags = get_kpageflags(page1_flags & PFN_MASK); + + if (page1_flags & UNEVICTABLE_BIT) { + printf("Page 1 is still marked unevictable after unlock\n"); + return 1; + } + + if (is_vma_lock_on_fault((unsigned long)map) || + is_vma_lock_on_fault((unsigned long)map + page_size)) { + printf("VMA is still lock on fault after unlock\n"); + return 1; + } + + return 0; +} + +static int test_mlock_onfault() +{ + char *map; + int ret = 1; + unsigned long page_size = getpagesize(); + + map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + if (map == MAP_FAILED) { + perror("test_mlock_locked mmap"); + goto out; + } + + if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) { + if (errno == ENOSYS) { + printf("Cannot call new mlock family, skipping test\n"); + _exit(0); + } + perror("mlock2(MLOCK_ONFAULT)"); + goto unmap; + } + + if (onfault_check(map)) + goto unmap; + + /* Now unlock and recheck attributes */ + if (munlock(map, 2 * page_size)) { + if (errno == ENOSYS) { + printf("Cannot call new mlock family, skipping test\n"); + _exit(0); + } + perror("munlock()"); + goto unmap; + } + + ret = unlock_onfault_check(map); +unmap: + munmap(map, 2 * page_size); +out: + return ret; +} + +static int test_lock_onfault_of_present() +{ + char *map; + int ret = 1; + unsigned long page_size = getpagesize(); + uint64_t page1_flags, page2_flags; + + map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + if (map == MAP_FAILED) { + perror("test_mlock_locked mmap"); + goto out; + } + + *map = 'a'; + + if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) { + if (errno == ENOSYS) { + printf("Cannot call new mlock family, skipping test\n"); + _exit(0); + } + perror("mlock2(MLOCK_ONFAULT)"); + goto unmap; + } + + page1_flags = get_pageflags((unsigned long)map); + page2_flags = get_pageflags((unsigned long)map + page_size); + page1_flags = get_kpageflags(page1_flags & PFN_MASK); + page2_flags = get_kpageflags(page2_flags & PFN_MASK); + + /* Page 1 should be unevictable */ + if ((page1_flags & UNEVICTABLE_BIT) == 0) { + printf("Failed to make present page unevictable\n"); + goto unmap; + } + + if (!is_vma_lock_on_fault((unsigned long)map) || + !is_vma_lock_on_fault((unsigned long)map + page_size)) { + printf("VMA with present pages is not marked lock on fault\n"); + goto unmap; + } + ret = 0; +unmap: + munmap(map, 2 * page_size); +out: + return ret; +} + +static int test_munlockall() +{ + char *map; + int ret = 1; + unsigned long page_size = getpagesize(); + + map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + + if (map == MAP_FAILED) { + perror("test_munlockall mmap"); + goto out; + } + + if (mlockall(MCL_CURRENT)) { + perror("mlockall(MCL_CURRENT)"); + goto out; + } + + if (lock_check(map)) + goto unmap; + + if (munlockall()) { + perror("munlockall()"); + goto unmap; + } + + if (unlock_lock_check(map)) + goto unmap; + + munmap(map, 2 * page_size); + + map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + + if (map == MAP_FAILED) { + perror("test_munlockall second mmap"); + goto out; + } + + if (mlockall(MCL_CURRENT | MCL_ONFAULT)) { + perror("mlockall(MCL_CURRENT | MCL_ONFAULT)"); + goto unmap; + } + + if (onfault_check(map)) + goto unmap; + + if (munlockall()) { + perror("munlockall()"); + goto unmap; + } + + if (unlock_onfault_check(map)) + goto unmap; + + if (mlockall(MCL_CURRENT | MCL_FUTURE)) { + perror("mlockall(MCL_CURRENT | MCL_FUTURE)"); + goto out; + } + + if (lock_check(map)) + goto unmap; + + if (munlockall()) { + perror("munlockall()"); + goto unmap; + } + + ret = unlock_lock_check(map); + +unmap: + munmap(map, 2 * page_size); +out: + munlockall(); + return ret; +} + +static int test_vma_management(bool call_mlock) +{ + int ret = 1; + void *map; + unsigned long page_size = getpagesize(); + struct vm_boundaries page1; + struct vm_boundaries page2; + struct vm_boundaries page3; + + map = mmap(NULL, 3 * page_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); + if (map == MAP_FAILED) { + perror("mmap()"); + return ret; + } + + if (call_mlock && mlock2_(map, 3 * page_size, MLOCK_ONFAULT)) { + if (errno == ENOSYS) { + printf("Cannot call new mlock family, skipping test\n"); + _exit(0); + } + perror("mlock(ONFAULT)\n"); + goto out; + } + + if (get_vm_area((unsigned long)map, &page1) || + get_vm_area((unsigned long)map + page_size, &page2) || + get_vm_area((unsigned long)map + page_size * 2, &page3)) { + printf("couldn't find mapping in /proc/self/maps\n"); + goto out; + } + + /* + * Before we unlock a portion, we need to that all three pages are in + * the same VMA. If they are not we abort this test (Note that this is + * not a failure) + */ + if (page1.start != page2.start || page2.start != page3.start) { + printf("VMAs are not merged to start, aborting test\n"); + ret = 0; + goto out; + } + + if (munlock(map + page_size, page_size)) { + perror("munlock()"); + goto out; + } + + if (get_vm_area((unsigned long)map, &page1) || + get_vm_area((unsigned long)map + page_size, &page2) || + get_vm_area((unsigned long)map + page_size * 2, &page3)) { + printf("couldn't find mapping in /proc/self/maps\n"); + goto out; + } + + /* All three VMAs should be different */ + if (page1.start == page2.start || page2.start == page3.start) { + printf("failed to split VMA for munlock\n"); + goto out; + } + + /* Now unlock the first and third page and check the VMAs again */ + if (munlock(map, page_size * 3)) { + perror("munlock()"); + goto out; + } + + if (get_vm_area((unsigned long)map, &page1) || + get_vm_area((unsigned long)map + page_size, &page2) || + get_vm_area((unsigned long)map + page_size * 2, &page3)) { + printf("couldn't find mapping in /proc/self/maps\n"); + goto out; + } + + /* Now all three VMAs should be the same */ + if (page1.start != page2.start || page2.start != page3.start) { + printf("failed to merge VMAs after munlock\n"); + goto out; + } + + ret = 0; +out: + munmap(map, 3 * page_size); + return ret; +} + +static int test_mlockall(int (test_function)(bool call_mlock)) +{ + int ret = 1; + + if (mlockall(MCL_CURRENT | MCL_ONFAULT | MCL_FUTURE)) { + perror("mlockall"); + return ret; + } + + ret = test_function(false); + munlockall(); + return ret; +} + +int main(int argc, char **argv) +{ + int ret = 0; + ret += test_mlock_lock(); + ret += test_mlock_onfault(); + ret += test_munlockall(); + ret += test_lock_onfault_of_present(); + ret += test_vma_management(true); + ret += test_mlockall(test_vma_management); + return ret; +} diff --git a/kernel/tools/testing/selftests/vm/on-fault-limit.c b/kernel/tools/testing/selftests/vm/on-fault-limit.c new file mode 100644 index 000000000..245acccce --- /dev/null +++ b/kernel/tools/testing/selftests/vm/on-fault-limit.c @@ -0,0 +1,47 @@ +#include <sys/mman.h> +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include <sys/time.h> +#include <sys/resource.h> + +#ifndef MCL_ONFAULT +#define MCL_ONFAULT (MCL_FUTURE << 1) +#endif + +static int test_limit(void) +{ + int ret = 1; + struct rlimit lims; + void *map; + + if (getrlimit(RLIMIT_MEMLOCK, &lims)) { + perror("getrlimit"); + return ret; + } + + if (mlockall(MCL_CURRENT | MCL_ONFAULT | MCL_FUTURE)) { + perror("mlockall"); + return ret; + } + + map = mmap(NULL, 2 * lims.rlim_max, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, 0, 0); + if (map != MAP_FAILED) + printf("mmap should have failed, but didn't\n"); + else { + ret = 0; + munmap(map, 2 * lims.rlim_max); + } + + munlockall(); + return ret; +} + +int main(int argc, char **argv) +{ + int ret = 0; + + ret += test_limit(); + return ret; +} diff --git a/kernel/tools/testing/selftests/vm/run_vmtests b/kernel/tools/testing/selftests/vm/run_vmtests index c87b68123..e11968b36 100755 --- a/kernel/tools/testing/selftests/vm/run_vmtests +++ b/kernel/tools/testing/selftests/vm/run_vmtests @@ -20,13 +20,26 @@ done < /proc/meminfo if [ -n "$freepgs" ] && [ -n "$pgsize" ]; then nr_hugepgs=`cat /proc/sys/vm/nr_hugepages` needpgs=`expr $needmem / $pgsize` - if [ $freepgs -lt $needpgs ]; then + tries=2 + while [ $tries -gt 0 ] && [ $freepgs -lt $needpgs ]; do lackpgs=$(( $needpgs - $freepgs )) + echo 3 > /proc/sys/vm/drop_caches echo $(( $lackpgs + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages if [ $? -ne 0 ]; then echo "Please run this test as root" exit 1 fi + while read name size unit; do + if [ "$name" = "HugePages_Free:" ]; then + freepgs=$size + fi + done < /proc/meminfo + tries=$((tries - 1)) + done + if [ $freepgs -lt $needpgs ]; then + printf "Not enough huge pages available (%d < %d)\n" \ + $freepgs $needpgs + exit 1 fi else echo "no hugetlbfs support in kernel?" @@ -75,10 +88,14 @@ else echo "[PASS]" fi +echo "NOTE: The above hugetlb tests provide minimal coverage. Use" +echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" +echo " hugetlb regression testing." + echo "--------------------" -echo "running hugetlbfstest" +echo "running userfaultfd" echo "--------------------" -./hugetlbfstest +./userfaultfd 128 32 if [ $? -ne 0 ]; then echo "[FAIL]" exitcode=1 @@ -90,4 +107,38 @@ fi umount $mnt rm -rf $mnt echo $nr_hugepgs > /proc/sys/vm/nr_hugepages + +echo "-----------------------" +echo "running compaction_test" +echo "-----------------------" +./compaction_test +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +echo "--------------------" +echo "running on-fault-limit" +echo "--------------------" +sudo -u nobody ./on-fault-limit +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +echo "--------------------" +echo "running mlock2-tests" +echo "--------------------" +./mlock2-tests +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + exit $exitcode diff --git a/kernel/tools/testing/selftests/vm/userfaultfd.c b/kernel/tools/testing/selftests/vm/userfaultfd.c new file mode 100644 index 000000000..d77ed41b2 --- /dev/null +++ b/kernel/tools/testing/selftests/vm/userfaultfd.c @@ -0,0 +1,645 @@ +/* + * Stress userfaultfd syscall. + * + * Copyright (C) 2015 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * This test allocates two virtual areas and bounces the physical + * memory across the two virtual areas (from area_src to area_dst) + * using userfaultfd. + * + * There are three threads running per CPU: + * + * 1) one per-CPU thread takes a per-page pthread_mutex in a random + * page of the area_dst (while the physical page may still be in + * area_src), and increments a per-page counter in the same page, + * and checks its value against a verification region. + * + * 2) another per-CPU thread handles the userfaults generated by + * thread 1 above. userfaultfd blocking reads or poll() modes are + * exercised interleaved. + * + * 3) one last per-CPU thread transfers the memory in the background + * at maximum bandwidth (if not already transferred by thread + * 2). Each cpu thread takes cares of transferring a portion of the + * area. + * + * When all threads of type 3 completed the transfer, one bounce is + * complete. area_src and area_dst are then swapped. All threads are + * respawned and so the bounce is immediately restarted in the + * opposite direction. + * + * per-CPU threads 1 by triggering userfaults inside + * pthread_mutex_lock will also verify the atomicity of the memory + * transfer (UFFDIO_COPY). + * + * The program takes two parameters: the amounts of physical memory in + * megabytes (MiB) of the area and the number of bounces to execute. + * + * # 100MiB 99999 bounces + * ./userfaultfd 100 99999 + * + * # 1GiB 99 bounces + * ./userfaultfd 1000 99 + * + * # 10MiB-~6GiB 999 bounces, continue forever unless an error triggers + * while ./userfaultfd $[RANDOM % 6000 + 10] 999; do true; done + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <errno.h> +#include <unistd.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> +#include <signal.h> +#include <poll.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include <sys/ioctl.h> +#include <pthread.h> +#include <linux/userfaultfd.h> + +#ifdef __NR_userfaultfd + +static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; + +#define BOUNCE_RANDOM (1<<0) +#define BOUNCE_RACINGFAULTS (1<<1) +#define BOUNCE_VERIFY (1<<2) +#define BOUNCE_POLL (1<<3) +static int bounces; + +static unsigned long long *count_verify; +static int uffd, finished, *pipefd; +static char *area_src, *area_dst; +static char *zeropage; +pthread_attr_t attr; + +/* pthread_mutex_t starts at page offset 0 */ +#define area_mutex(___area, ___nr) \ + ((pthread_mutex_t *) ((___area) + (___nr)*page_size)) +/* + * count is placed in the page after pthread_mutex_t naturally aligned + * to avoid non alignment faults on non-x86 archs. + */ +#define area_count(___area, ___nr) \ + ((volatile unsigned long long *) ((unsigned long) \ + ((___area) + (___nr)*page_size + \ + sizeof(pthread_mutex_t) + \ + sizeof(unsigned long long) - 1) & \ + ~(unsigned long)(sizeof(unsigned long long) \ + - 1))) + +static int my_bcmp(char *str1, char *str2, size_t n) +{ + unsigned long i; + for (i = 0; i < n; i++) + if (str1[i] != str2[i]) + return 1; + return 0; +} + +static void *locking_thread(void *arg) +{ + unsigned long cpu = (unsigned long) arg; + struct random_data rand; + unsigned long page_nr = *(&(page_nr)); /* uninitialized warning */ + int32_t rand_nr; + unsigned long long count; + char randstate[64]; + unsigned int seed; + time_t start; + + if (bounces & BOUNCE_RANDOM) { + seed = (unsigned int) time(NULL) - bounces; + if (!(bounces & BOUNCE_RACINGFAULTS)) + seed += cpu; + bzero(&rand, sizeof(rand)); + bzero(&randstate, sizeof(randstate)); + if (initstate_r(seed, randstate, sizeof(randstate), &rand)) + fprintf(stderr, "srandom_r error\n"), exit(1); + } else { + page_nr = -bounces; + if (!(bounces & BOUNCE_RACINGFAULTS)) + page_nr += cpu * nr_pages_per_cpu; + } + + while (!finished) { + if (bounces & BOUNCE_RANDOM) { + if (random_r(&rand, &rand_nr)) + fprintf(stderr, "random_r 1 error\n"), exit(1); + page_nr = rand_nr; + if (sizeof(page_nr) > sizeof(rand_nr)) { + if (random_r(&rand, &rand_nr)) + fprintf(stderr, "random_r 2 error\n"), exit(1); + page_nr |= (((unsigned long) rand_nr) << 16) << + 16; + } + } else + page_nr += 1; + page_nr %= nr_pages; + + start = time(NULL); + if (bounces & BOUNCE_VERIFY) { + count = *area_count(area_dst, page_nr); + if (!count) + fprintf(stderr, + "page_nr %lu wrong count %Lu %Lu\n", + page_nr, count, + count_verify[page_nr]), exit(1); + + + /* + * We can't use bcmp (or memcmp) because that + * returns 0 erroneously if the memory is + * changing under it (even if the end of the + * page is never changing and always + * different). + */ +#if 1 + if (!my_bcmp(area_dst + page_nr * page_size, zeropage, + page_size)) + fprintf(stderr, + "my_bcmp page_nr %lu wrong count %Lu %Lu\n", + page_nr, count, + count_verify[page_nr]), exit(1); +#else + unsigned long loops; + + loops = 0; + /* uncomment the below line to test with mutex */ + /* pthread_mutex_lock(area_mutex(area_dst, page_nr)); */ + while (!bcmp(area_dst + page_nr * page_size, zeropage, + page_size)) { + loops += 1; + if (loops > 10) + break; + } + /* uncomment below line to test with mutex */ + /* pthread_mutex_unlock(area_mutex(area_dst, page_nr)); */ + if (loops) { + fprintf(stderr, + "page_nr %lu all zero thread %lu %p %lu\n", + page_nr, cpu, area_dst + page_nr * page_size, + loops); + if (loops > 10) + exit(1); + } +#endif + } + + pthread_mutex_lock(area_mutex(area_dst, page_nr)); + count = *area_count(area_dst, page_nr); + if (count != count_verify[page_nr]) { + fprintf(stderr, + "page_nr %lu memory corruption %Lu %Lu\n", + page_nr, count, + count_verify[page_nr]), exit(1); + } + count++; + *area_count(area_dst, page_nr) = count_verify[page_nr] = count; + pthread_mutex_unlock(area_mutex(area_dst, page_nr)); + + if (time(NULL) - start > 1) + fprintf(stderr, + "userfault too slow %ld " + "possible false positive with overcommit\n", + time(NULL) - start); + } + + return NULL; +} + +static int copy_page(unsigned long offset) +{ + struct uffdio_copy uffdio_copy; + + if (offset >= nr_pages * page_size) + fprintf(stderr, "unexpected offset %lu\n", + offset), exit(1); + uffdio_copy.dst = (unsigned long) area_dst + offset; + uffdio_copy.src = (unsigned long) area_src + offset; + uffdio_copy.len = page_size; + uffdio_copy.mode = 0; + uffdio_copy.copy = 0; + if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy)) { + /* real retval in ufdio_copy.copy */ + if (uffdio_copy.copy != -EEXIST) + fprintf(stderr, "UFFDIO_COPY error %Ld\n", + uffdio_copy.copy), exit(1); + } else if (uffdio_copy.copy != page_size) { + fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n", + uffdio_copy.copy), exit(1); + } else + return 1; + return 0; +} + +static void *uffd_poll_thread(void *arg) +{ + unsigned long cpu = (unsigned long) arg; + struct pollfd pollfd[2]; + struct uffd_msg msg; + int ret; + unsigned long offset; + char tmp_chr; + unsigned long userfaults = 0; + + pollfd[0].fd = uffd; + pollfd[0].events = POLLIN; + pollfd[1].fd = pipefd[cpu*2]; + pollfd[1].events = POLLIN; + + for (;;) { + ret = poll(pollfd, 2, -1); + if (!ret) + fprintf(stderr, "poll error %d\n", ret), exit(1); + if (ret < 0) + perror("poll"), exit(1); + if (pollfd[1].revents & POLLIN) { + if (read(pollfd[1].fd, &tmp_chr, 1) != 1) + fprintf(stderr, "read pipefd error\n"), + exit(1); + break; + } + if (!(pollfd[0].revents & POLLIN)) + fprintf(stderr, "pollfd[0].revents %d\n", + pollfd[0].revents), exit(1); + ret = read(uffd, &msg, sizeof(msg)); + if (ret < 0) { + if (errno == EAGAIN) + continue; + perror("nonblocking read error"), exit(1); + } + if (msg.event != UFFD_EVENT_PAGEFAULT) + fprintf(stderr, "unexpected msg event %u\n", + msg.event), exit(1); + if (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) + fprintf(stderr, "unexpected write fault\n"), exit(1); + offset = (char *)(unsigned long)msg.arg.pagefault.address - + area_dst; + offset &= ~(page_size-1); + if (copy_page(offset)) + userfaults++; + } + return (void *)userfaults; +} + +pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER; + +static void *uffd_read_thread(void *arg) +{ + unsigned long *this_cpu_userfaults; + struct uffd_msg msg; + unsigned long offset; + int ret; + + this_cpu_userfaults = (unsigned long *) arg; + *this_cpu_userfaults = 0; + + pthread_mutex_unlock(&uffd_read_mutex); + /* from here cancellation is ok */ + + for (;;) { + ret = read(uffd, &msg, sizeof(msg)); + if (ret != sizeof(msg)) { + if (ret < 0) + perror("blocking read error"), exit(1); + else + fprintf(stderr, "short read\n"), exit(1); + } + if (msg.event != UFFD_EVENT_PAGEFAULT) + fprintf(stderr, "unexpected msg event %u\n", + msg.event), exit(1); + if (bounces & BOUNCE_VERIFY && + msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) + fprintf(stderr, "unexpected write fault\n"), exit(1); + offset = (char *)(unsigned long)msg.arg.pagefault.address - + area_dst; + offset &= ~(page_size-1); + if (copy_page(offset)) + (*this_cpu_userfaults)++; + } + return (void *)NULL; +} + +static void *background_thread(void *arg) +{ + unsigned long cpu = (unsigned long) arg; + unsigned long page_nr; + + for (page_nr = cpu * nr_pages_per_cpu; + page_nr < (cpu+1) * nr_pages_per_cpu; + page_nr++) + copy_page(page_nr * page_size); + + return NULL; +} + +static int stress(unsigned long *userfaults) +{ + unsigned long cpu; + pthread_t locking_threads[nr_cpus]; + pthread_t uffd_threads[nr_cpus]; + pthread_t background_threads[nr_cpus]; + void **_userfaults = (void **) userfaults; + + finished = 0; + for (cpu = 0; cpu < nr_cpus; cpu++) { + if (pthread_create(&locking_threads[cpu], &attr, + locking_thread, (void *)cpu)) + return 1; + if (bounces & BOUNCE_POLL) { + if (pthread_create(&uffd_threads[cpu], &attr, + uffd_poll_thread, (void *)cpu)) + return 1; + } else { + if (pthread_create(&uffd_threads[cpu], &attr, + uffd_read_thread, + &_userfaults[cpu])) + return 1; + pthread_mutex_lock(&uffd_read_mutex); + } + if (pthread_create(&background_threads[cpu], &attr, + background_thread, (void *)cpu)) + return 1; + } + for (cpu = 0; cpu < nr_cpus; cpu++) + if (pthread_join(background_threads[cpu], NULL)) + return 1; + + /* + * Be strict and immediately zap area_src, the whole area has + * been transferred already by the background treads. The + * area_src could then be faulted in in a racy way by still + * running uffdio_threads reading zeropages after we zapped + * area_src (but they're guaranteed to get -EEXIST from + * UFFDIO_COPY without writing zero pages into area_dst + * because the background threads already completed). + */ + if (madvise(area_src, nr_pages * page_size, MADV_DONTNEED)) { + perror("madvise"); + return 1; + } + + for (cpu = 0; cpu < nr_cpus; cpu++) { + char c; + if (bounces & BOUNCE_POLL) { + if (write(pipefd[cpu*2+1], &c, 1) != 1) { + fprintf(stderr, "pipefd write error\n"); + return 1; + } + if (pthread_join(uffd_threads[cpu], &_userfaults[cpu])) + return 1; + } else { + if (pthread_cancel(uffd_threads[cpu])) + return 1; + if (pthread_join(uffd_threads[cpu], NULL)) + return 1; + } + } + + finished = 1; + for (cpu = 0; cpu < nr_cpus; cpu++) + if (pthread_join(locking_threads[cpu], NULL)) + return 1; + + return 0; +} + +static int userfaultfd_stress(void) +{ + void *area; + char *tmp_area; + unsigned long nr; + struct uffdio_register uffdio_register; + struct uffdio_api uffdio_api; + unsigned long cpu; + int uffd_flags, err; + unsigned long userfaults[nr_cpus]; + + if (posix_memalign(&area, page_size, nr_pages * page_size)) { + fprintf(stderr, "out of memory\n"); + return 1; + } + area_src = area; + if (posix_memalign(&area, page_size, nr_pages * page_size)) { + fprintf(stderr, "out of memory\n"); + return 1; + } + area_dst = area; + + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + if (uffd < 0) { + fprintf(stderr, + "userfaultfd syscall not available in this kernel\n"); + return 1; + } + uffd_flags = fcntl(uffd, F_GETFD, NULL); + + uffdio_api.api = UFFD_API; + uffdio_api.features = 0; + if (ioctl(uffd, UFFDIO_API, &uffdio_api)) { + fprintf(stderr, "UFFDIO_API\n"); + return 1; + } + if (uffdio_api.api != UFFD_API) { + fprintf(stderr, "UFFDIO_API error %Lu\n", uffdio_api.api); + return 1; + } + + count_verify = malloc(nr_pages * sizeof(unsigned long long)); + if (!count_verify) { + perror("count_verify"); + return 1; + } + + for (nr = 0; nr < nr_pages; nr++) { + *area_mutex(area_src, nr) = (pthread_mutex_t) + PTHREAD_MUTEX_INITIALIZER; + count_verify[nr] = *area_count(area_src, nr) = 1; + /* + * In the transition between 255 to 256, powerpc will + * read out of order in my_bcmp and see both bytes as + * zero, so leave a placeholder below always non-zero + * after the count, to avoid my_bcmp to trigger false + * positives. + */ + *(area_count(area_src, nr) + 1) = 1; + } + + pipefd = malloc(sizeof(int) * nr_cpus * 2); + if (!pipefd) { + perror("pipefd"); + return 1; + } + for (cpu = 0; cpu < nr_cpus; cpu++) { + if (pipe2(&pipefd[cpu*2], O_CLOEXEC | O_NONBLOCK)) { + perror("pipe"); + return 1; + } + } + + if (posix_memalign(&area, page_size, page_size)) { + fprintf(stderr, "out of memory\n"); + return 1; + } + zeropage = area; + bzero(zeropage, page_size); + + pthread_mutex_lock(&uffd_read_mutex); + + pthread_attr_init(&attr); + pthread_attr_setstacksize(&attr, 16*1024*1024); + + err = 0; + while (bounces--) { + unsigned long expected_ioctls; + + printf("bounces: %d, mode:", bounces); + if (bounces & BOUNCE_RANDOM) + printf(" rnd"); + if (bounces & BOUNCE_RACINGFAULTS) + printf(" racing"); + if (bounces & BOUNCE_VERIFY) + printf(" ver"); + if (bounces & BOUNCE_POLL) + printf(" poll"); + printf(", "); + fflush(stdout); + + if (bounces & BOUNCE_POLL) + fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + else + fcntl(uffd, F_SETFL, uffd_flags & ~O_NONBLOCK); + + /* register */ + uffdio_register.range.start = (unsigned long) area_dst; + uffdio_register.range.len = nr_pages * page_size; + uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) { + fprintf(stderr, "register failure\n"); + return 1; + } + expected_ioctls = (1 << _UFFDIO_WAKE) | + (1 << _UFFDIO_COPY) | + (1 << _UFFDIO_ZEROPAGE); + if ((uffdio_register.ioctls & expected_ioctls) != + expected_ioctls) { + fprintf(stderr, + "unexpected missing ioctl for anon memory\n"); + return 1; + } + + /* + * The madvise done previously isn't enough: some + * uffd_thread could have read userfaults (one of + * those already resolved by the background thread) + * and it may be in the process of calling + * UFFDIO_COPY. UFFDIO_COPY will read the zapped + * area_src and it would map a zero page in it (of + * course such a UFFDIO_COPY is perfectly safe as it'd + * return -EEXIST). The problem comes at the next + * bounce though: that racing UFFDIO_COPY would + * generate zeropages in the area_src, so invalidating + * the previous MADV_DONTNEED. Without this additional + * MADV_DONTNEED those zeropages leftovers in the + * area_src would lead to -EEXIST failure during the + * next bounce, effectively leaving a zeropage in the + * area_dst. + * + * Try to comment this out madvise to see the memory + * corruption being caught pretty quick. + * + * khugepaged is also inhibited to collapse THP after + * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's + * required to MADV_DONTNEED here. + */ + if (madvise(area_dst, nr_pages * page_size, MADV_DONTNEED)) { + perror("madvise 2"); + return 1; + } + + /* bounce pass */ + if (stress(userfaults)) + return 1; + + /* unregister */ + if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) { + fprintf(stderr, "register failure\n"); + return 1; + } + + /* verification */ + if (bounces & BOUNCE_VERIFY) { + for (nr = 0; nr < nr_pages; nr++) { + if (*area_count(area_dst, nr) != count_verify[nr]) { + fprintf(stderr, + "error area_count %Lu %Lu %lu\n", + *area_count(area_src, nr), + count_verify[nr], + nr); + err = 1; + bounces = 0; + } + } + } + + /* prepare next bounce */ + tmp_area = area_src; + area_src = area_dst; + area_dst = tmp_area; + + printf("userfaults:"); + for (cpu = 0; cpu < nr_cpus; cpu++) + printf(" %lu", userfaults[cpu]); + printf("\n"); + } + + return err; +} + +int main(int argc, char **argv) +{ + if (argc < 3) + fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1); + nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + page_size = sysconf(_SC_PAGE_SIZE); + if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2 + > page_size) + fprintf(stderr, "Impossible to run this test\n"), exit(2); + nr_pages_per_cpu = atol(argv[1]) * 1024*1024 / page_size / + nr_cpus; + if (!nr_pages_per_cpu) { + fprintf(stderr, "invalid MiB\n"); + fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1); + } + bounces = atoi(argv[2]); + if (bounces <= 0) { + fprintf(stderr, "invalid bounces\n"); + fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1); + } + nr_pages = nr_pages_per_cpu * nr_cpus; + printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n", + nr_pages, nr_pages_per_cpu); + return userfaultfd_stress(); +} + +#else /* __NR_userfaultfd */ + +#warning "missing __NR_userfaultfd definition" + +int main(void) +{ + printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n"); + return 0; +} + +#endif /* __NR_userfaultfd */ diff --git a/kernel/tools/testing/selftests/x86/Makefile b/kernel/tools/testing/selftests/x86/Makefile index 9b0d8baf2..eabcff411 100644 --- a/kernel/tools/testing/selftests/x86/Makefile +++ b/kernel/tools/testing/selftests/x86/Makefile @@ -4,8 +4,9 @@ include ../lib.mk .PHONY: all all_32 all_64 warn_32bit_failure clean -TARGETS_C_BOTHBITS := sigreturn single_step_syscall -TARGETS_C_32BIT_ONLY := entry_from_vm86 +TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs ldt_gdt syscall_nt ptrace_syscall +TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn test_syscall_vdso unwind_vdso \ + test_FCMOV test_FCOMI test_FISTTP TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32) @@ -35,7 +36,7 @@ clean: $(RM) $(BINARIES_32) $(BINARIES_64) $(TARGETS_C_32BIT_ALL:%=%_32): %_32: %.c - $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl + $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl -lm $(TARGETS_C_BOTHBITS:%=%_64): %_64: %.c $(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl @@ -57,3 +58,8 @@ warn_32bit_failure: echo " yum install glibc-devel.*i686"; \ exit 0; endif + +# Some tests have additional dependencies. +sysret_ss_attrs_64: thunks.S +ptrace_syscall_32: raw_syscall_helper_32.S +test_syscall_vdso_32: thunks_32.S diff --git a/kernel/tools/testing/selftests/x86/entry_from_vm86.c b/kernel/tools/testing/selftests/x86/entry_from_vm86.c index 5c38a1876..d075ea0e5 100644 --- a/kernel/tools/testing/selftests/x86/entry_from_vm86.c +++ b/kernel/tools/testing/selftests/x86/entry_from_vm86.c @@ -28,6 +28,55 @@ static unsigned long load_addr = 0x10000; static int nerrs = 0; +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static void clearhandler(int sig) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static sig_atomic_t got_signal; + +static void sighandler(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t*)ctx_void; + + if (ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_VM || + (ctx->uc_mcontext.gregs[REG_CS] & 3) != 3) { + printf("[FAIL]\tSignal frame should not reflect vm86 mode\n"); + nerrs++; + } + + const char *signame; + if (sig == SIGSEGV) + signame = "SIGSEGV"; + else if (sig == SIGILL) + signame = "SIGILL"; + else + signame = "unexpected signal"; + + printf("[INFO]\t%s: FLAGS = 0x%lx, CS = 0x%hx\n", signame, + (unsigned long)ctx->uc_mcontext.gregs[REG_EFL], + (unsigned short)ctx->uc_mcontext.gregs[REG_CS]); + + got_signal = 1; +} + asm ( ".pushsection .rodata\n\t" ".type vmcode_bound, @object\n\t" @@ -38,6 +87,14 @@ asm ( "int3\n\t" "vmcode_sysenter:\n\t" "sysenter\n\t" + "vmcode_syscall:\n\t" + "syscall\n\t" + "vmcode_sti:\n\t" + "sti\n\t" + "vmcode_int3:\n\t" + "int3\n\t" + "vmcode_int80:\n\t" + "int $0x80\n\t" ".size vmcode, . - vmcode\n\t" "end_vmcode:\n\t" ".code32\n\t" @@ -45,9 +102,12 @@ asm ( ); extern unsigned char vmcode[], end_vmcode[]; -extern unsigned char vmcode_bound[], vmcode_sysenter[]; +extern unsigned char vmcode_bound[], vmcode_sysenter[], vmcode_syscall[], + vmcode_sti[], vmcode_int3[], vmcode_int80[]; -static void do_test(struct vm86plus_struct *v86, unsigned long eip, +/* Returns false if the test was skipped. */ +static bool do_test(struct vm86plus_struct *v86, unsigned long eip, + unsigned int rettype, unsigned int retarg, const char *text) { long ret; @@ -56,9 +116,10 @@ static void do_test(struct vm86plus_struct *v86, unsigned long eip, v86->regs.eip = eip; ret = vm86(VM86_ENTER, v86); - if (ret == -1 && errno == ENOSYS) { - printf("[SKIP]\tvm86 not supported\n"); - return; + if (ret == -1 && (errno == ENOSYS || errno == EPERM)) { + printf("[SKIP]\tvm86 %s\n", + errno == ENOSYS ? "not supported" : "not allowed"); + return false; } if (VM86_TYPE(ret) == VM86_INTx) { @@ -73,13 +134,30 @@ static void do_test(struct vm86plus_struct *v86, unsigned long eip, else sprintf(trapname, "%d", trapno); - printf("[OK]\tExited vm86 mode due to #%s\n", trapname); + printf("[INFO]\tExited vm86 mode due to #%s\n", trapname); } else if (VM86_TYPE(ret) == VM86_UNKNOWN) { - printf("[OK]\tExited vm86 mode due to unhandled GP fault\n"); + printf("[INFO]\tExited vm86 mode due to unhandled GP fault\n"); + } else if (VM86_TYPE(ret) == VM86_TRAP) { + printf("[INFO]\tExited vm86 mode due to a trap (arg=%ld)\n", + VM86_ARG(ret)); + } else if (VM86_TYPE(ret) == VM86_SIGNAL) { + printf("[INFO]\tExited vm86 mode due to a signal\n"); + } else if (VM86_TYPE(ret) == VM86_STI) { + printf("[INFO]\tExited vm86 mode due to STI\n"); } else { - printf("[OK]\tExited vm86 mode due to type %ld, arg %ld\n", + printf("[INFO]\tExited vm86 mode due to type %ld, arg %ld\n", VM86_TYPE(ret), VM86_ARG(ret)); } + + if (rettype == -1 || + (VM86_TYPE(ret) == rettype && VM86_ARG(ret) == retarg)) { + printf("[OK]\tReturned correctly\n"); + } else { + printf("[FAIL]\tIncorrect return reason\n"); + nerrs++; + } + + return true; } int main(void) @@ -105,10 +183,56 @@ int main(void) assert((v86.regs.cs & 3) == 0); /* Looks like RPL = 0 */ /* #BR -- should deliver SIG??? */ - do_test(&v86, vmcode_bound - vmcode, "#BR"); + do_test(&v86, vmcode_bound - vmcode, VM86_INTx, 5, "#BR"); + + /* + * SYSENTER -- should cause #GP or #UD depending on CPU. + * Expected return type -1 means that we shouldn't validate + * the vm86 return value. This will avoid problems on non-SEP + * CPUs. + */ + sethandler(SIGILL, sighandler, 0); + do_test(&v86, vmcode_sysenter - vmcode, -1, 0, "SYSENTER"); + clearhandler(SIGILL); + + /* + * SYSCALL would be a disaster in VM86 mode. Fortunately, + * there is no kernel that both enables SYSCALL and sets + * EFER.SCE, so it's #UD on all systems. But vm86 is + * buggy (or has a "feature"), so the SIGILL will actually + * be delivered. + */ + sethandler(SIGILL, sighandler, 0); + do_test(&v86, vmcode_syscall - vmcode, VM86_SIGNAL, 0, "SYSCALL"); + clearhandler(SIGILL); + + /* STI with VIP set */ + v86.regs.eflags |= X86_EFLAGS_VIP; + v86.regs.eflags &= ~X86_EFLAGS_IF; + do_test(&v86, vmcode_sti - vmcode, VM86_STI, 0, "STI with VIP set"); + + /* INT3 -- should cause #BP */ + do_test(&v86, vmcode_int3 - vmcode, VM86_TRAP, 3, "INT3"); + + /* INT80 -- should exit with "INTx 0x80" */ + v86.regs.eax = (unsigned int)-1; + do_test(&v86, vmcode_int80 - vmcode, VM86_INTx, 0x80, "int80"); + + /* Execute a null pointer */ + v86.regs.cs = 0; + v86.regs.ss = 0; + sethandler(SIGSEGV, sighandler, 0); + got_signal = 0; + if (do_test(&v86, 0, VM86_SIGNAL, 0, "Execute null pointer") && + !got_signal) { + printf("[FAIL]\tDid not receive SIGSEGV\n"); + nerrs++; + } + clearhandler(SIGSEGV); - /* SYSENTER -- should cause #GP or #UD depending on CPU */ - do_test(&v86, vmcode_sysenter - vmcode, "SYSENTER"); + /* Make sure nothing explodes if we fork. */ + if (fork() > 0) + return 0; return (nerrs == 0 ? 0 : 1); } diff --git a/kernel/tools/testing/selftests/x86/ldt_gdt.c b/kernel/tools/testing/selftests/x86/ldt_gdt.c new file mode 100644 index 000000000..31a3035cd --- /dev/null +++ b/kernel/tools/testing/selftests/x86/ldt_gdt.c @@ -0,0 +1,576 @@ +/* + * ldt_gdt.c - Test cases for LDT and GDT access + * Copyright (c) 2015 Andrew Lutomirski + */ + +#define _GNU_SOURCE +#include <err.h> +#include <stdio.h> +#include <stdint.h> +#include <signal.h> +#include <setjmp.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <asm/ldt.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdbool.h> +#include <pthread.h> +#include <sched.h> +#include <linux/futex.h> + +#define AR_ACCESSED (1<<8) + +#define AR_TYPE_RODATA (0 * (1<<9)) +#define AR_TYPE_RWDATA (1 * (1<<9)) +#define AR_TYPE_RODATA_EXPDOWN (2 * (1<<9)) +#define AR_TYPE_RWDATA_EXPDOWN (3 * (1<<9)) +#define AR_TYPE_XOCODE (4 * (1<<9)) +#define AR_TYPE_XRCODE (5 * (1<<9)) +#define AR_TYPE_XOCODE_CONF (6 * (1<<9)) +#define AR_TYPE_XRCODE_CONF (7 * (1<<9)) + +#define AR_DPL3 (3 * (1<<13)) + +#define AR_S (1 << 12) +#define AR_P (1 << 15) +#define AR_AVL (1 << 20) +#define AR_L (1 << 21) +#define AR_DB (1 << 22) +#define AR_G (1 << 23) + +static int nerrs; + +static void check_invalid_segment(uint16_t index, int ldt) +{ + uint32_t has_limit = 0, has_ar = 0, limit, ar; + uint32_t selector = (index << 3) | (ldt << 2) | 3; + + asm ("lsl %[selector], %[limit]\n\t" + "jnz 1f\n\t" + "movl $1, %[has_limit]\n\t" + "1:" + : [limit] "=r" (limit), [has_limit] "+rm" (has_limit) + : [selector] "r" (selector)); + asm ("larl %[selector], %[ar]\n\t" + "jnz 1f\n\t" + "movl $1, %[has_ar]\n\t" + "1:" + : [ar] "=r" (ar), [has_ar] "+rm" (has_ar) + : [selector] "r" (selector)); + + if (has_limit || has_ar) { + printf("[FAIL]\t%s entry %hu is valid but should be invalid\n", + (ldt ? "LDT" : "GDT"), index); + nerrs++; + } else { + printf("[OK]\t%s entry %hu is invalid\n", + (ldt ? "LDT" : "GDT"), index); + } +} + +static void check_valid_segment(uint16_t index, int ldt, + uint32_t expected_ar, uint32_t expected_limit, + bool verbose) +{ + uint32_t has_limit = 0, has_ar = 0, limit, ar; + uint32_t selector = (index << 3) | (ldt << 2) | 3; + + asm ("lsl %[selector], %[limit]\n\t" + "jnz 1f\n\t" + "movl $1, %[has_limit]\n\t" + "1:" + : [limit] "=r" (limit), [has_limit] "+rm" (has_limit) + : [selector] "r" (selector)); + asm ("larl %[selector], %[ar]\n\t" + "jnz 1f\n\t" + "movl $1, %[has_ar]\n\t" + "1:" + : [ar] "=r" (ar), [has_ar] "+rm" (has_ar) + : [selector] "r" (selector)); + + if (!has_limit || !has_ar) { + printf("[FAIL]\t%s entry %hu is invalid but should be valid\n", + (ldt ? "LDT" : "GDT"), index); + nerrs++; + return; + } + + if (ar != expected_ar) { + printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n", + (ldt ? "LDT" : "GDT"), index, ar, expected_ar); + nerrs++; + } else if (limit != expected_limit) { + printf("[FAIL]\t%s entry %hu has limit 0x%08X but expected 0x%08X\n", + (ldt ? "LDT" : "GDT"), index, limit, expected_limit); + nerrs++; + } else if (verbose) { + printf("[OK]\t%s entry %hu has AR 0x%08X and limit 0x%08X\n", + (ldt ? "LDT" : "GDT"), index, ar, limit); + } +} + +static bool install_valid_mode(const struct user_desc *desc, uint32_t ar, + bool oldmode) +{ + int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11, + desc, sizeof(*desc)); + if (ret < -1) + errno = -ret; + if (ret == 0) { + uint32_t limit = desc->limit; + if (desc->limit_in_pages) + limit = (limit << 12) + 4095; + check_valid_segment(desc->entry_number, 1, ar, limit, true); + return true; + } else if (errno == ENOSYS) { + printf("[OK]\tmodify_ldt returned -ENOSYS\n"); + return false; + } else { + if (desc->seg_32bit) { + printf("[FAIL]\tUnexpected modify_ldt failure %d\n", + errno); + nerrs++; + return false; + } else { + printf("[OK]\tmodify_ldt rejected 16 bit segment\n"); + return false; + } + } +} + +static bool install_valid(const struct user_desc *desc, uint32_t ar) +{ + return install_valid_mode(desc, ar, false); +} + +static void install_invalid(const struct user_desc *desc, bool oldmode) +{ + int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11, + desc, sizeof(*desc)); + if (ret < -1) + errno = -ret; + if (ret == 0) { + check_invalid_segment(desc->entry_number, 1); + } else if (errno == ENOSYS) { + printf("[OK]\tmodify_ldt returned -ENOSYS\n"); + } else { + if (desc->seg_32bit) { + printf("[FAIL]\tUnexpected modify_ldt failure %d\n", + errno); + nerrs++; + } else { + printf("[OK]\tmodify_ldt rejected 16 bit segment\n"); + } + } +} + +static int safe_modify_ldt(int func, struct user_desc *ptr, + unsigned long bytecount) +{ + int ret = syscall(SYS_modify_ldt, 0x11, ptr, bytecount); + if (ret < -1) + errno = -ret; + return ret; +} + +static void fail_install(struct user_desc *desc) +{ + if (safe_modify_ldt(0x11, desc, sizeof(*desc)) == 0) { + printf("[FAIL]\tmodify_ldt accepted a bad descriptor\n"); + nerrs++; + } else if (errno == ENOSYS) { + printf("[OK]\tmodify_ldt returned -ENOSYS\n"); + } else { + printf("[OK]\tmodify_ldt failure %d\n", errno); + } +} + +static void do_simple_tests(void) +{ + struct user_desc desc = { + .entry_number = 0, + .base_addr = 0, + .limit = 10, + .seg_32bit = 1, + .contents = 2, /* Code, not conforming */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 0, + .useable = 0 + }; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB); + + desc.limit_in_pages = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_P | AR_DB | AR_G); + + check_invalid_segment(1, 1); + + desc.entry_number = 2; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_P | AR_DB | AR_G); + + check_invalid_segment(1, 1); + + desc.base_addr = 0xf0000000; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_P | AR_DB | AR_G); + + desc.useable = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_P | AR_DB | AR_G | AR_AVL); + + desc.seg_not_present = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_DB | AR_G | AR_AVL); + + desc.seg_32bit = 0; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_G | AR_AVL); + + desc.seg_32bit = 1; + desc.contents = 0; + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | + AR_S | AR_DB | AR_G | AR_AVL); + + desc.read_exec_only = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | + AR_S | AR_DB | AR_G | AR_AVL); + + desc.contents = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA_EXPDOWN | + AR_S | AR_DB | AR_G | AR_AVL); + + desc.read_exec_only = 0; + desc.limit_in_pages = 0; + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA_EXPDOWN | + AR_S | AR_DB | AR_AVL); + + desc.contents = 3; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE_CONF | + AR_S | AR_DB | AR_AVL); + + desc.read_exec_only = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE_CONF | + AR_S | AR_DB | AR_AVL); + + desc.read_exec_only = 0; + desc.contents = 2; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | + AR_S | AR_DB | AR_AVL); + + desc.read_exec_only = 1; + +#ifdef __x86_64__ + desc.lm = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE | + AR_S | AR_DB | AR_AVL); + desc.lm = 0; +#endif + + bool entry1_okay = install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE | + AR_S | AR_DB | AR_AVL); + + if (entry1_okay) { + printf("[RUN]\tTest fork\n"); + pid_t child = fork(); + if (child == 0) { + nerrs = 0; + check_valid_segment(desc.entry_number, 1, + AR_DPL3 | AR_TYPE_XOCODE | + AR_S | AR_DB | AR_AVL, desc.limit, + true); + check_invalid_segment(1, 1); + exit(nerrs ? 1 : 0); + } else { + int status; + if (waitpid(child, &status, 0) != child || + !WIFEXITED(status)) { + printf("[FAIL]\tChild died\n"); + nerrs++; + } else if (WEXITSTATUS(status) != 0) { + printf("[FAIL]\tChild failed\n"); + nerrs++; + } else { + printf("[OK]\tChild succeeded\n"); + } + } + + printf("[RUN]\tTest size\n"); + int i; + for (i = 0; i < 8192; i++) { + desc.entry_number = i; + desc.limit = i; + if (safe_modify_ldt(0x11, &desc, sizeof(desc)) != 0) { + printf("[FAIL]\tFailed to install entry %d\n", i); + nerrs++; + break; + } + } + for (int j = 0; j < i; j++) { + check_valid_segment(j, 1, AR_DPL3 | AR_TYPE_XOCODE | + AR_S | AR_DB | AR_AVL, j, false); + } + printf("[DONE]\tSize test\n"); + } else { + printf("[SKIP]\tSkipping fork and size tests because we have no LDT\n"); + } + + /* Test entry_number too high. */ + desc.entry_number = 8192; + fail_install(&desc); + + /* Test deletion and actions mistakeable for deletion. */ + memset(&desc, 0, sizeof(desc)); + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P); + + desc.seg_not_present = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S); + + desc.seg_not_present = 0; + desc.read_exec_only = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S | AR_P); + + desc.read_exec_only = 0; + desc.seg_not_present = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S); + + desc.read_exec_only = 1; + desc.limit = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S); + + desc.limit = 0; + desc.base_addr = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S); + + desc.base_addr = 0; + install_invalid(&desc, false); + + desc.seg_not_present = 0; + desc.read_exec_only = 0; + desc.seg_32bit = 1; + install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB); + install_invalid(&desc, true); +} + +/* + * 0: thread is idle + * 1: thread armed + * 2: thread should clear LDT entry 0 + * 3: thread should exit + */ +static volatile unsigned int ftx; + +static void *threadproc(void *ctx) +{ + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(1, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) + err(1, "sched_setaffinity to CPU 1"); /* should never fail */ + + while (1) { + syscall(SYS_futex, &ftx, FUTEX_WAIT, 0, NULL, NULL, 0); + while (ftx != 2) { + if (ftx >= 3) + return NULL; + } + + /* clear LDT entry 0 */ + const struct user_desc desc = {}; + if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) != 0) + err(1, "modify_ldt"); + + /* If ftx == 2, set it to zero. If ftx == 100, quit. */ + unsigned int x = -2; + asm volatile ("lock xaddl %[x], %[ftx]" : + [x] "+r" (x), [ftx] "+m" (ftx)); + if (x != 2) + return NULL; + } +} + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); + +} + +static jmp_buf jmpbuf; + +static void sigsegv(int sig, siginfo_t *info, void *ctx_void) +{ + siglongjmp(jmpbuf, 1); +} + +static void do_multicpu_tests(void) +{ + cpu_set_t cpuset; + pthread_t thread; + int failures = 0, iters = 5, i; + unsigned short orig_ss; + + CPU_ZERO(&cpuset); + CPU_SET(1, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) { + printf("[SKIP]\tCannot set affinity to CPU 1\n"); + return; + } + + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) { + printf("[SKIP]\tCannot set affinity to CPU 0\n"); + return; + } + + sethandler(SIGSEGV, sigsegv, 0); +#ifdef __i386__ + /* True 32-bit kernels send SIGILL instead of SIGSEGV on IRET faults. */ + sethandler(SIGILL, sigsegv, 0); +#endif + + printf("[RUN]\tCross-CPU LDT invalidation\n"); + + if (pthread_create(&thread, 0, threadproc, 0) != 0) + err(1, "pthread_create"); + + asm volatile ("mov %%ss, %0" : "=rm" (orig_ss)); + + for (i = 0; i < 5; i++) { + if (sigsetjmp(jmpbuf, 1) != 0) + continue; + + /* Make sure the thread is ready after the last test. */ + while (ftx != 0) + ; + + struct user_desc desc = { + .entry_number = 0, + .base_addr = 0, + .limit = 0xfffff, + .seg_32bit = 1, + .contents = 0, /* Data */ + .read_exec_only = 0, + .limit_in_pages = 1, + .seg_not_present = 0, + .useable = 0 + }; + + if (safe_modify_ldt(0x11, &desc, sizeof(desc)) != 0) { + if (errno != ENOSYS) + err(1, "modify_ldt"); + printf("[SKIP]\tmodify_ldt unavailable\n"); + break; + } + + /* Arm the thread. */ + ftx = 1; + syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); + + asm volatile ("mov %0, %%ss" : : "r" (0x7)); + + /* Go! */ + ftx = 2; + + while (ftx != 0) + ; + + /* + * On success, modify_ldt will segfault us synchronously, + * and we'll escape via siglongjmp. + */ + + failures++; + asm volatile ("mov %0, %%ss" : : "rm" (orig_ss)); + }; + + ftx = 100; /* Kill the thread. */ + syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); + + if (pthread_join(thread, NULL) != 0) + err(1, "pthread_join"); + + if (failures) { + printf("[FAIL]\t%d of %d iterations failed\n", failures, iters); + nerrs++; + } else { + printf("[OK]\tAll %d iterations succeeded\n", iters); + } +} + +static int finish_exec_test(void) +{ + /* + * In a sensible world, this would be check_invalid_segment(0, 1); + * For better or for worse, though, the LDT is inherited across exec. + * We can probably change this safely, but for now we test it. + */ + check_valid_segment(0, 1, + AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB, + 42, true); + + return nerrs ? 1 : 0; +} + +static void do_exec_test(void) +{ + printf("[RUN]\tTest exec\n"); + + struct user_desc desc = { + .entry_number = 0, + .base_addr = 0, + .limit = 42, + .seg_32bit = 1, + .contents = 2, /* Code, not conforming */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 0, + .useable = 0 + }; + install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB); + + pid_t child = fork(); + if (child == 0) { + execl("/proc/self/exe", "ldt_gdt_test_exec", NULL); + printf("[FAIL]\tCould not exec self\n"); + exit(1); /* exec failed */ + } else { + int status; + if (waitpid(child, &status, 0) != child || + !WIFEXITED(status)) { + printf("[FAIL]\tChild died\n"); + nerrs++; + } else if (WEXITSTATUS(status) != 0) { + printf("[FAIL]\tChild failed\n"); + nerrs++; + } else { + printf("[OK]\tChild succeeded\n"); + } + } +} + +int main(int argc, char **argv) +{ + if (argc == 1 && !strcmp(argv[0], "ldt_gdt_test_exec")) + return finish_exec_test(); + + do_simple_tests(); + + do_multicpu_tests(); + + do_exec_test(); + + return nerrs ? 1 : 0; +} diff --git a/kernel/tools/testing/selftests/x86/ptrace_syscall.c b/kernel/tools/testing/selftests/x86/ptrace_syscall.c new file mode 100644 index 000000000..5105b49cd --- /dev/null +++ b/kernel/tools/testing/selftests/x86/ptrace_syscall.c @@ -0,0 +1,294 @@ +#define _GNU_SOURCE + +#include <sys/ptrace.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/syscall.h> +#include <sys/user.h> +#include <unistd.h> +#include <errno.h> +#include <stddef.h> +#include <stdio.h> +#include <err.h> +#include <string.h> +#include <asm/ptrace-abi.h> +#include <sys/auxv.h> + +/* Bitness-agnostic defines for user_regs_struct fields. */ +#ifdef __x86_64__ +# define user_syscall_nr orig_rax +# define user_arg0 rdi +# define user_arg1 rsi +# define user_arg2 rdx +# define user_arg3 r10 +# define user_arg4 r8 +# define user_arg5 r9 +# define user_ip rip +# define user_ax rax +#else +# define user_syscall_nr orig_eax +# define user_arg0 ebx +# define user_arg1 ecx +# define user_arg2 edx +# define user_arg3 esi +# define user_arg4 edi +# define user_arg5 ebp +# define user_ip eip +# define user_ax eax +#endif + +static int nerrs = 0; + +struct syscall_args32 { + uint32_t nr, arg0, arg1, arg2, arg3, arg4, arg5; +}; + +#ifdef __i386__ +extern void sys32_helper(struct syscall_args32 *, void *); +extern void int80_and_ret(void); +#endif + +/* + * Helper to invoke int80 with controlled regs and capture the final regs. + */ +static void do_full_int80(struct syscall_args32 *args) +{ +#ifdef __x86_64__ + register unsigned long bp asm("bp") = args->arg5; + asm volatile ("int $0x80" + : "+a" (args->nr), + "+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2), + "+S" (args->arg3), "+D" (args->arg4), "+r" (bp)); + args->arg5 = bp; +#else + sys32_helper(args, int80_and_ret); +#endif +} + +#ifdef __i386__ +static void (*vsyscall32)(void); + +/* + * Nasty helper to invoke AT_SYSINFO (i.e. __kernel_vsyscall) with + * controlled regs and capture the final regs. This is so nasty that it + * crashes my copy of gdb :) + */ +static void do_full_vsyscall32(struct syscall_args32 *args) +{ + sys32_helper(args, vsyscall32); +} +#endif + +static siginfo_t wait_trap(pid_t chld) +{ + siginfo_t si; + if (waitid(P_PID, chld, &si, WEXITED|WSTOPPED) != 0) + err(1, "waitid"); + if (si.si_pid != chld) + errx(1, "got unexpected pid in event\n"); + if (si.si_code != CLD_TRAPPED) + errx(1, "got unexpected event type %d\n", si.si_code); + return si; +} + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static void clearhandler(int sig) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +#ifdef __x86_64__ +# define REG_BP REG_RBP +#else +# define REG_BP REG_EBP +#endif + +static void empty_handler(int sig, siginfo_t *si, void *ctx_void) +{ +} + +static void test_sys32_regs(void (*do_syscall)(struct syscall_args32 *)) +{ + struct syscall_args32 args = { + .nr = 224, /* gettid */ + .arg0 = 10, .arg1 = 11, .arg2 = 12, + .arg3 = 13, .arg4 = 14, .arg5 = 15, + }; + + do_syscall(&args); + + if (args.nr != getpid() || + args.arg0 != 10 || args.arg1 != 11 || args.arg2 != 12 || + args.arg3 != 13 || args.arg4 != 14 || args.arg5 != 15) { + printf("[FAIL]\tgetpid() failed to preseve regs\n"); + nerrs++; + } else { + printf("[OK]\tgetpid() preserves regs\n"); + } + + sethandler(SIGUSR1, empty_handler, 0); + + args.nr = 37; /* kill */ + args.arg0 = getpid(); + args.arg1 = SIGUSR1; + do_syscall(&args); + if (args.nr != 0 || + args.arg0 != getpid() || args.arg1 != SIGUSR1 || args.arg2 != 12 || + args.arg3 != 13 || args.arg4 != 14 || args.arg5 != 15) { + printf("[FAIL]\tkill(getpid(), SIGUSR1) failed to preseve regs\n"); + nerrs++; + } else { + printf("[OK]\tkill(getpid(), SIGUSR1) preserves regs\n"); + } + clearhandler(SIGUSR1); +} + +static void test_ptrace_syscall_restart(void) +{ + printf("[RUN]\tptrace-induced syscall restart\n"); + pid_t chld = fork(); + if (chld < 0) + err(1, "fork"); + + if (chld == 0) { + if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0) + err(1, "PTRACE_TRACEME"); + + printf("\tChild will make one syscall\n"); + raise(SIGSTOP); + + syscall(SYS_gettid, 10, 11, 12, 13, 14, 15); + _exit(0); + } + + int status; + + /* Wait for SIGSTOP. */ + if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status)) + err(1, "waitpid"); + + struct user_regs_struct regs; + + printf("[RUN]\tSYSEMU\n"); + if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0) + err(1, "PTRACE_SYSCALL"); + wait_trap(chld); + + if (ptrace(PTRACE_GETREGS, chld, 0, ®s) != 0) + err(1, "PTRACE_GETREGS"); + + if (regs.user_syscall_nr != SYS_gettid || + regs.user_arg0 != 10 || regs.user_arg1 != 11 || + regs.user_arg2 != 12 || regs.user_arg3 != 13 || + regs.user_arg4 != 14 || regs.user_arg5 != 15) { + printf("[FAIL]\tInitial args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5); + nerrs++; + } else { + printf("[OK]\tInitial nr and args are correct\n"); + } + + printf("[RUN]\tRestart the syscall (ip = 0x%lx)\n", + (unsigned long)regs.user_ip); + + /* + * This does exactly what it appears to do if syscall is int80 or + * SYSCALL64. For SYSCALL32 or SYSENTER, though, this is highly + * magical. It needs to work so that ptrace and syscall restart + * work as expected. + */ + regs.user_ax = regs.user_syscall_nr; + regs.user_ip -= 2; + if (ptrace(PTRACE_SETREGS, chld, 0, ®s) != 0) + err(1, "PTRACE_SETREGS"); + + if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0) + err(1, "PTRACE_SYSCALL"); + wait_trap(chld); + + if (ptrace(PTRACE_GETREGS, chld, 0, ®s) != 0) + err(1, "PTRACE_GETREGS"); + + if (regs.user_syscall_nr != SYS_gettid || + regs.user_arg0 != 10 || regs.user_arg1 != 11 || + regs.user_arg2 != 12 || regs.user_arg3 != 13 || + regs.user_arg4 != 14 || regs.user_arg5 != 15) { + printf("[FAIL]\tRestart nr or args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5); + nerrs++; + } else { + printf("[OK]\tRestarted nr and args are correct\n"); + } + + printf("[RUN]\tChange nr and args and restart the syscall (ip = 0x%lx)\n", + (unsigned long)regs.user_ip); + + regs.user_ax = SYS_getpid; + regs.user_arg0 = 20; + regs.user_arg1 = 21; + regs.user_arg2 = 22; + regs.user_arg3 = 23; + regs.user_arg4 = 24; + regs.user_arg5 = 25; + regs.user_ip -= 2; + + if (ptrace(PTRACE_SETREGS, chld, 0, ®s) != 0) + err(1, "PTRACE_SETREGS"); + + if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0) + err(1, "PTRACE_SYSCALL"); + wait_trap(chld); + + if (ptrace(PTRACE_GETREGS, chld, 0, ®s) != 0) + err(1, "PTRACE_GETREGS"); + + if (regs.user_syscall_nr != SYS_getpid || + regs.user_arg0 != 20 || regs.user_arg1 != 21 || regs.user_arg2 != 22 || + regs.user_arg3 != 23 || regs.user_arg4 != 24 || regs.user_arg5 != 25) { + printf("[FAIL]\tRestart nr or args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5); + nerrs++; + } else { + printf("[OK]\tReplacement nr and args are correct\n"); + } + + if (ptrace(PTRACE_CONT, chld, 0, 0) != 0) + err(1, "PTRACE_CONT"); + if (waitpid(chld, &status, 0) != chld) + err(1, "waitpid"); + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { + printf("[FAIL]\tChild failed\n"); + nerrs++; + } else { + printf("[OK]\tChild exited cleanly\n"); + } +} + +int main() +{ + printf("[RUN]\tCheck int80 return regs\n"); + test_sys32_regs(do_full_int80); + +#if defined(__i386__) && (!defined(__GLIBC__) || __GLIBC__ > 2 || __GLIBC_MINOR__ >= 16) + vsyscall32 = (void *)getauxval(AT_SYSINFO); + printf("[RUN]\tCheck AT_SYSINFO return regs\n"); + test_sys32_regs(do_full_vsyscall32); +#endif + + test_ptrace_syscall_restart(); + + return 0; +} diff --git a/kernel/tools/testing/selftests/x86/raw_syscall_helper_32.S b/kernel/tools/testing/selftests/x86/raw_syscall_helper_32.S new file mode 100644 index 000000000..534e71e35 --- /dev/null +++ b/kernel/tools/testing/selftests/x86/raw_syscall_helper_32.S @@ -0,0 +1,46 @@ +.global sys32_helper +sys32_helper: + /* Args: syscall_args_32*, function pointer */ + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 5*4(%esp), %eax /* pointer to args struct */ + + movl 1*4(%eax), %ebx + movl 2*4(%eax), %ecx + movl 3*4(%eax), %edx + movl 4*4(%eax), %esi + movl 5*4(%eax), %edi + movl 6*4(%eax), %ebp + movl 0*4(%eax), %eax + + call *(6*4)(%esp) /* Do the syscall */ + + /* Now we need to recover without losing any reg values */ + pushl %eax + movl 6*4(%esp), %eax + popl 0*4(%eax) + movl %ebx, 1*4(%eax) + movl %ecx, 2*4(%eax) + movl %edx, 3*4(%eax) + movl %esi, 4*4(%eax) + movl %edi, 5*4(%eax) + movl %ebp, 6*4(%eax) + + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + + .type sys32_helper, @function + .size sys32_helper, .-sys32_helper + +.global int80_and_ret +int80_and_ret: + int $0x80 + ret + + .type int80_and_ret, @function + .size int80_and_ret, .-int80_and_ret diff --git a/kernel/tools/testing/selftests/x86/syscall_arg_fault.c b/kernel/tools/testing/selftests/x86/syscall_arg_fault.c new file mode 100644 index 000000000..7db4fc9fa --- /dev/null +++ b/kernel/tools/testing/selftests/x86/syscall_arg_fault.c @@ -0,0 +1,130 @@ +/* + * syscall_arg_fault.c - tests faults 32-bit fast syscall stack args + * Copyright (c) 2015 Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#define _GNU_SOURCE + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <sys/signal.h> +#include <sys/ucontext.h> +#include <err.h> +#include <setjmp.h> +#include <errno.h> + +/* Our sigaltstack scratch space. */ +static unsigned char altstack_data[SIGSTKSZ]; + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static volatile sig_atomic_t sig_traps; +static sigjmp_buf jmpbuf; + +static volatile sig_atomic_t n_errs; + +static void sigsegv(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t*)ctx_void; + + if (ctx->uc_mcontext.gregs[REG_EAX] != -EFAULT) { + printf("[FAIL]\tAX had the wrong value: 0x%x\n", + ctx->uc_mcontext.gregs[REG_EAX]); + n_errs++; + } else { + printf("[OK]\tSeems okay\n"); + } + + siglongjmp(jmpbuf, 1); +} + +static void sigill(int sig, siginfo_t *info, void *ctx_void) +{ + printf("[SKIP]\tIllegal instruction\n"); + siglongjmp(jmpbuf, 1); +} + +int main() +{ + stack_t stack = { + .ss_sp = altstack_data, + .ss_size = SIGSTKSZ, + }; + if (sigaltstack(&stack, NULL) != 0) + err(1, "sigaltstack"); + + sethandler(SIGSEGV, sigsegv, SA_ONSTACK); + sethandler(SIGILL, sigill, SA_ONSTACK); + + /* + * Exercise another nasty special case. The 32-bit SYSCALL + * and SYSENTER instructions (even in compat mode) each + * clobber one register. A Linux system call has a syscall + * number and six arguments, and the user stack pointer + * needs to live in some register on return. That means + * that we need eight registers, but SYSCALL and SYSENTER + * only preserve seven registers. As a result, one argument + * ends up on the stack. The stack is user memory, which + * means that the kernel can fail to read it. + * + * The 32-bit fast system calls don't have a defined ABI: + * we're supposed to invoke them through the vDSO. So we'll + * fudge it: we set all regs to invalid pointer values and + * invoke the entry instruction. The return will fail no + * matter what, and we completely lose our program state, + * but we can fix it up with a signal handler. + */ + + printf("[RUN]\tSYSENTER with invalid state\n"); + if (sigsetjmp(jmpbuf, 1) == 0) { + asm volatile ( + "movl $-1, %%eax\n\t" + "movl $-1, %%ebx\n\t" + "movl $-1, %%ecx\n\t" + "movl $-1, %%edx\n\t" + "movl $-1, %%esi\n\t" + "movl $-1, %%edi\n\t" + "movl $-1, %%ebp\n\t" + "movl $-1, %%esp\n\t" + "sysenter" + : : : "memory", "flags"); + } + + printf("[RUN]\tSYSCALL with invalid state\n"); + if (sigsetjmp(jmpbuf, 1) == 0) { + asm volatile ( + "movl $-1, %%eax\n\t" + "movl $-1, %%ebx\n\t" + "movl $-1, %%ecx\n\t" + "movl $-1, %%edx\n\t" + "movl $-1, %%esi\n\t" + "movl $-1, %%edi\n\t" + "movl $-1, %%ebp\n\t" + "movl $-1, %%esp\n\t" + "syscall\n\t" + "pushl $0" /* make sure we segfault cleanly */ + : : : "memory", "flags"); + } + + return 0; +} diff --git a/kernel/tools/testing/selftests/x86/syscall_nt.c b/kernel/tools/testing/selftests/x86/syscall_nt.c new file mode 100644 index 000000000..60c06af46 --- /dev/null +++ b/kernel/tools/testing/selftests/x86/syscall_nt.c @@ -0,0 +1,54 @@ +/* + * syscall_nt.c - checks syscalls with NT set + * Copyright (c) 2014-2015 Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Some obscure user-space code requires the ability to make system calls + * with FLAGS.NT set. Make sure it works. + */ + +#include <stdio.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <asm/processor-flags.h> + +#ifdef __x86_64__ +# define WIDTH "q" +#else +# define WIDTH "l" +#endif + +static unsigned long get_eflags(void) +{ + unsigned long eflags; + asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags)); + return eflags; +} + +static void set_eflags(unsigned long eflags) +{ + asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH + : : "rm" (eflags) : "flags"); +} + +int main() +{ + printf("[RUN]\tSet NT and issue a syscall\n"); + set_eflags(get_eflags() | X86_EFLAGS_NT); + syscall(SYS_getpid); + if (get_eflags() & X86_EFLAGS_NT) { + printf("[OK]\tThe syscall worked and NT is still set\n"); + return 0; + } else { + printf("[FAIL]\tThe syscall worked but NT was cleared\n"); + return 1; + } +} diff --git a/kernel/tools/testing/selftests/x86/sysret_ss_attrs.c b/kernel/tools/testing/selftests/x86/sysret_ss_attrs.c new file mode 100644 index 000000000..ce42d5a64 --- /dev/null +++ b/kernel/tools/testing/selftests/x86/sysret_ss_attrs.c @@ -0,0 +1,112 @@ +/* + * sysret_ss_attrs.c - test that syscalls return valid hidden SS attributes + * Copyright (c) 2015 Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * On AMD CPUs, SYSRET can return with a valid SS descriptor with with + * the hidden attributes set to an unusable state. Make sure the kernel + * doesn't let this happen. + */ + +#define _GNU_SOURCE + +#include <stdlib.h> +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <sys/mman.h> +#include <err.h> +#include <stddef.h> +#include <stdbool.h> +#include <pthread.h> + +static void *threadproc(void *ctx) +{ + /* + * Do our best to cause sleeps on this CPU to exit the kernel and + * re-enter with SS = 0. + */ + while (true) + ; + + return NULL; +} + +#ifdef __x86_64__ +extern unsigned long call32_from_64(void *stack, void (*function)(void)); + +asm (".pushsection .text\n\t" + ".code32\n\t" + "test_ss:\n\t" + "pushl $0\n\t" + "popl %eax\n\t" + "ret\n\t" + ".code64"); +extern void test_ss(void); +#endif + +int main() +{ + /* + * Start a busy-looping thread on the same CPU we're on. + * For simplicity, just stick everything to CPU 0. This will + * fail in some containers, but that's probably okay. + */ + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) + printf("[WARN]\tsched_setaffinity failed\n"); + + pthread_t thread; + if (pthread_create(&thread, 0, threadproc, 0) != 0) + err(1, "pthread_create"); + +#ifdef __x86_64__ + unsigned char *stack32 = mmap(NULL, 4096, PROT_READ | PROT_WRITE, + MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, + -1, 0); + if (stack32 == MAP_FAILED) + err(1, "mmap"); +#endif + + printf("[RUN]\tSyscalls followed by SS validation\n"); + + for (int i = 0; i < 1000; i++) { + /* + * Go to sleep and return using sysret (if we're 64-bit + * or we're 32-bit on AMD on a 64-bit kernel). On AMD CPUs, + * SYSRET doesn't fix up the cached SS descriptor, so the + * kernel needs some kind of workaround to make sure that we + * end the system call with a valid stack segment. This + * can be a confusing failure because the SS *selector* + * is the same regardless. + */ + usleep(2); + +#ifdef __x86_64__ + /* + * On 32-bit, just doing a syscall through glibc is enough + * to cause a crash if our cached SS descriptor is invalid. + * On 64-bit, it's not, so try extra hard. + */ + call32_from_64(stack32 + 4088, test_ss); +#endif + } + + printf("[OK]\tWe survived\n"); + +#ifdef __x86_64__ + munmap(stack32, 4096); +#endif + + return 0; +} diff --git a/kernel/tools/testing/selftests/x86/test_FCMOV.c b/kernel/tools/testing/selftests/x86/test_FCMOV.c new file mode 100644 index 000000000..4adcca0c8 --- /dev/null +++ b/kernel/tools/testing/selftests/x86/test_FCMOV.c @@ -0,0 +1,93 @@ +#undef _GNU_SOURCE +#define _GNU_SOURCE 1 +#undef __USE_GNU +#define __USE_GNU 1 +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <signal.h> +#include <sys/types.h> +#include <sys/select.h> +#include <sys/time.h> +#include <sys/wait.h> + +#define TEST(insn) \ +long double __attribute__((noinline)) insn(long flags) \ +{ \ + long double out; \ + asm ("\n" \ + " push %1""\n" \ + " popf""\n" \ + " fldpi""\n" \ + " fld1""\n" \ + " " #insn " %%st(1), %%st" "\n" \ + " ffree %%st(1)" "\n" \ + : "=t" (out) \ + : "r" (flags) \ + ); \ + return out; \ +} + +TEST(fcmovb) +TEST(fcmove) +TEST(fcmovbe) +TEST(fcmovu) +TEST(fcmovnb) +TEST(fcmovne) +TEST(fcmovnbe) +TEST(fcmovnu) + +enum { + CF = 1 << 0, + PF = 1 << 2, + ZF = 1 << 6, +}; + +void sighandler(int sig) +{ + printf("[FAIL]\tGot signal %d, exiting\n", sig); + exit(1); +} + +int main(int argc, char **argv, char **envp) +{ + int err = 0; + + /* SIGILL triggers on 32-bit kernels w/o fcomi emulation + * when run with "no387 nofxsr". Other signals are caught + * just in case. + */ + signal(SIGILL, sighandler); + signal(SIGFPE, sighandler); + signal(SIGSEGV, sighandler); + + printf("[RUN]\tTesting fcmovCC instructions\n"); + /* If fcmovCC() returns 1.0, the move wasn't done */ + err |= !(fcmovb(0) == 1.0); err |= !(fcmovnb(0) != 1.0); + err |= !(fcmove(0) == 1.0); err |= !(fcmovne(0) != 1.0); + err |= !(fcmovbe(0) == 1.0); err |= !(fcmovnbe(0) != 1.0); + err |= !(fcmovu(0) == 1.0); err |= !(fcmovnu(0) != 1.0); + + err |= !(fcmovb(CF) != 1.0); err |= !(fcmovnb(CF) == 1.0); + err |= !(fcmove(CF) == 1.0); err |= !(fcmovne(CF) != 1.0); + err |= !(fcmovbe(CF) != 1.0); err |= !(fcmovnbe(CF) == 1.0); + err |= !(fcmovu(CF) == 1.0); err |= !(fcmovnu(CF) != 1.0); + + err |= !(fcmovb(ZF) == 1.0); err |= !(fcmovnb(ZF) != 1.0); + err |= !(fcmove(ZF) != 1.0); err |= !(fcmovne(ZF) == 1.0); + err |= !(fcmovbe(ZF) != 1.0); err |= !(fcmovnbe(ZF) == 1.0); + err |= !(fcmovu(ZF) == 1.0); err |= !(fcmovnu(ZF) != 1.0); + + err |= !(fcmovb(PF) == 1.0); err |= !(fcmovnb(PF) != 1.0); + err |= !(fcmove(PF) == 1.0); err |= !(fcmovne(PF) != 1.0); + err |= !(fcmovbe(PF) == 1.0); err |= !(fcmovnbe(PF) != 1.0); + err |= !(fcmovu(PF) != 1.0); err |= !(fcmovnu(PF) == 1.0); + + if (!err) + printf("[OK]\tfcmovCC\n"); + else + printf("[FAIL]\tfcmovCC errors: %d\n", err); + + return err; +} diff --git a/kernel/tools/testing/selftests/x86/test_FCOMI.c b/kernel/tools/testing/selftests/x86/test_FCOMI.c new file mode 100644 index 000000000..db4933e31 --- /dev/null +++ b/kernel/tools/testing/selftests/x86/test_FCOMI.c @@ -0,0 +1,331 @@ +#undef _GNU_SOURCE +#define _GNU_SOURCE 1 +#undef __USE_GNU +#define __USE_GNU 1 +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <signal.h> +#include <sys/types.h> +#include <sys/select.h> +#include <sys/time.h> +#include <sys/wait.h> +#include <fenv.h> + +enum { + CF = 1 << 0, + PF = 1 << 2, + ZF = 1 << 6, + ARITH = CF | PF | ZF, +}; + +long res_fcomi_pi_1; +long res_fcomi_1_pi; +long res_fcomi_1_1; +long res_fcomi_nan_1; +/* sNaN is s|111 1111 1|1xx xxxx xxxx xxxx xxxx xxxx */ +/* qNaN is s|111 1111 1|0xx xxxx xxxx xxxx xxxx xxxx (some x must be nonzero) */ +int snan = 0x7fc11111; +int qnan = 0x7f811111; +unsigned short snan1[5]; +/* sNaN80 is s|111 1111 1111 1111 |10xx xx...xx (some x must be nonzero) */ +unsigned short snan80[5] = { 0x1111, 0x1111, 0x1111, 0x8111, 0x7fff }; + +int test(long flags) +{ + feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); + + asm ("\n" + + " push %0""\n" + " popf""\n" + " fld1""\n" + " fldpi""\n" + " fcomi %%st(1), %%st" "\n" + " ffree %%st(0)" "\n" + " ffree %%st(1)" "\n" + " pushf""\n" + " pop res_fcomi_1_pi""\n" + + " push %0""\n" + " popf""\n" + " fldpi""\n" + " fld1""\n" + " fcomi %%st(1), %%st" "\n" + " ffree %%st(0)" "\n" + " ffree %%st(1)" "\n" + " pushf""\n" + " pop res_fcomi_pi_1""\n" + + " push %0""\n" + " popf""\n" + " fld1""\n" + " fld1""\n" + " fcomi %%st(1), %%st" "\n" + " ffree %%st(0)" "\n" + " ffree %%st(1)" "\n" + " pushf""\n" + " pop res_fcomi_1_1""\n" + : + : "r" (flags) + ); + if ((res_fcomi_1_pi & ARITH) != (0)) { + printf("[BAD]\tfcomi_1_pi with flags:%lx\n", flags); + return 1; + } + if ((res_fcomi_pi_1 & ARITH) != (CF)) { + printf("[BAD]\tfcomi_pi_1 with flags:%lx->%lx\n", flags, res_fcomi_pi_1 & ARITH); + return 1; + } + if ((res_fcomi_1_1 & ARITH) != (ZF)) { + printf("[BAD]\tfcomi_1_1 with flags:%lx\n", flags); + return 1; + } + if (fetestexcept(FE_INVALID) != 0) { + printf("[BAD]\tFE_INVALID is set in %s\n", __func__); + return 1; + } + return 0; +} + +int test_qnan(long flags) +{ + feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); + + asm ("\n" + " push %0""\n" + " popf""\n" + " flds qnan""\n" + " fld1""\n" + " fnclex""\n" // fld of a qnan raised FE_INVALID, clear it + " fcomi %%st(1), %%st" "\n" + " ffree %%st(0)" "\n" + " ffree %%st(1)" "\n" + " pushf""\n" + " pop res_fcomi_nan_1""\n" + : + : "r" (flags) + ); + if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) { + printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags); + return 1; + } + if (fetestexcept(FE_INVALID) != FE_INVALID) { + printf("[BAD]\tFE_INVALID is not set in %s\n", __func__); + return 1; + } + return 0; +} + +int testu_qnan(long flags) +{ + feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); + + asm ("\n" + " push %0""\n" + " popf""\n" + " flds qnan""\n" + " fld1""\n" + " fnclex""\n" // fld of a qnan raised FE_INVALID, clear it + " fucomi %%st(1), %%st" "\n" + " ffree %%st(0)" "\n" + " ffree %%st(1)" "\n" + " pushf""\n" + " pop res_fcomi_nan_1""\n" + : + : "r" (flags) + ); + if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) { + printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags); + return 1; + } + if (fetestexcept(FE_INVALID) != 0) { + printf("[BAD]\tFE_INVALID is set in %s\n", __func__); + return 1; + } + return 0; +} + +int testu_snan(long flags) +{ + feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); + + asm ("\n" + " push %0""\n" + " popf""\n" +// " flds snan""\n" // WRONG, this will convert 32-bit fp snan to a *qnan* in 80-bit fp register! +// " fstpt snan1""\n" // if uncommented, it prints "snan1:7fff c111 1100 0000 0000" - c111, not 8111! +// " fnclex""\n" // flds of a snan raised FE_INVALID, clear it + " fldt snan80""\n" // fldt never raise FE_INVALID + " fld1""\n" + " fucomi %%st(1), %%st" "\n" + " ffree %%st(0)" "\n" + " ffree %%st(1)" "\n" + " pushf""\n" + " pop res_fcomi_nan_1""\n" + : + : "r" (flags) + ); + if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) { + printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags); + return 1; + } +// printf("snan:%x snan1:%04x %04x %04x %04x %04x\n", snan, snan1[4], snan1[3], snan1[2], snan1[1], snan1[0]); + if (fetestexcept(FE_INVALID) != FE_INVALID) { + printf("[BAD]\tFE_INVALID is not set in %s\n", __func__); + return 1; + } + return 0; +} + +int testp(long flags) +{ + feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); + + asm ("\n" + + " push %0""\n" + " popf""\n" + " fld1""\n" + " fldpi""\n" + " fcomip %%st(1), %%st" "\n" + " ffree %%st(0)" "\n" + " pushf""\n" + " pop res_fcomi_1_pi""\n" + + " push %0""\n" + " popf""\n" + " fldpi""\n" + " fld1""\n" + " fcomip %%st(1), %%st" "\n" + " ffree %%st(0)" "\n" + " pushf""\n" + " pop res_fcomi_pi_1""\n" + + " push %0""\n" + " popf""\n" + " fld1""\n" + " fld1""\n" + " fcomip %%st(1), %%st" "\n" + " ffree %%st(0)" "\n" + " pushf""\n" + " pop res_fcomi_1_1""\n" + : + : "r" (flags) + ); + if ((res_fcomi_1_pi & ARITH) != (0)) { + printf("[BAD]\tfcomi_1_pi with flags:%lx\n", flags); + return 1; + } + if ((res_fcomi_pi_1 & ARITH) != (CF)) { + printf("[BAD]\tfcomi_pi_1 with flags:%lx->%lx\n", flags, res_fcomi_pi_1 & ARITH); + return 1; + } + if ((res_fcomi_1_1 & ARITH) != (ZF)) { + printf("[BAD]\tfcomi_1_1 with flags:%lx\n", flags); + return 1; + } + if (fetestexcept(FE_INVALID) != 0) { + printf("[BAD]\tFE_INVALID is set in %s\n", __func__); + return 1; + } + return 0; +} + +int testp_qnan(long flags) +{ + feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); + + asm ("\n" + " push %0""\n" + " popf""\n" + " flds qnan""\n" + " fld1""\n" + " fnclex""\n" // fld of a qnan raised FE_INVALID, clear it + " fcomip %%st(1), %%st" "\n" + " ffree %%st(0)" "\n" + " pushf""\n" + " pop res_fcomi_nan_1""\n" + : + : "r" (flags) + ); + if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) { + printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags); + return 1; + } + if (fetestexcept(FE_INVALID) != FE_INVALID) { + printf("[BAD]\tFE_INVALID is not set in %s\n", __func__); + return 1; + } + return 0; +} + +int testup_qnan(long flags) +{ + feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); + + asm ("\n" + " push %0""\n" + " popf""\n" + " flds qnan""\n" + " fld1""\n" + " fnclex""\n" // fld of a qnan raised FE_INVALID, clear it + " fucomip %%st(1), %%st" "\n" + " ffree %%st(0)" "\n" + " pushf""\n" + " pop res_fcomi_nan_1""\n" + : + : "r" (flags) + ); + if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) { + printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags); + return 1; + } + if (fetestexcept(FE_INVALID) != 0) { + printf("[BAD]\tFE_INVALID is set in %s\n", __func__); + return 1; + } + return 0; +} + +void sighandler(int sig) +{ + printf("[FAIL]\tGot signal %d, exiting\n", sig); + exit(1); +} + +int main(int argc, char **argv, char **envp) +{ + int err = 0; + + /* SIGILL triggers on 32-bit kernels w/o fcomi emulation + * when run with "no387 nofxsr". Other signals are caught + * just in case. + */ + signal(SIGILL, sighandler); + signal(SIGFPE, sighandler); + signal(SIGSEGV, sighandler); + + printf("[RUN]\tTesting f[u]comi[p] instructions\n"); + err |= test(0); + err |= test_qnan(0); + err |= testu_qnan(0); + err |= testu_snan(0); + err |= test(CF|ZF|PF); + err |= test_qnan(CF|ZF|PF); + err |= testu_qnan(CF|ZF|PF); + err |= testu_snan(CF|ZF|PF); + err |= testp(0); + err |= testp_qnan(0); + err |= testup_qnan(0); + err |= testp(CF|ZF|PF); + err |= testp_qnan(CF|ZF|PF); + err |= testup_qnan(CF|ZF|PF); + if (!err) + printf("[OK]\tf[u]comi[p]\n"); + else + printf("[FAIL]\tf[u]comi[p] errors: %d\n", err); + + return err; +} diff --git a/kernel/tools/testing/selftests/x86/test_FISTTP.c b/kernel/tools/testing/selftests/x86/test_FISTTP.c new file mode 100644 index 000000000..b8e61a047 --- /dev/null +++ b/kernel/tools/testing/selftests/x86/test_FISTTP.c @@ -0,0 +1,137 @@ +#undef _GNU_SOURCE +#define _GNU_SOURCE 1 +#undef __USE_GNU +#define __USE_GNU 1 +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <signal.h> +#include <sys/types.h> +#include <sys/select.h> +#include <sys/time.h> +#include <sys/wait.h> +#include <fenv.h> + +unsigned long long res64 = -1; +unsigned int res32 = -1; +unsigned short res16 = -1; + +int test(void) +{ + int ex; + + feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); + asm volatile ("\n" + " fld1""\n" + " fisttp res16""\n" + " fld1""\n" + " fisttpl res32""\n" + " fld1""\n" + " fisttpll res64""\n" + : : : "memory" + ); + if (res16 != 1 || res32 != 1 || res64 != 1) { + printf("[BAD]\tfisttp 1\n"); + return 1; + } + ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); + if (ex != 0) { + printf("[BAD]\tfisttp 1: wrong exception state\n"); + return 1; + } + + feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); + asm volatile ("\n" + " fldpi""\n" + " fisttp res16""\n" + " fldpi""\n" + " fisttpl res32""\n" + " fldpi""\n" + " fisttpll res64""\n" + : : : "memory" + ); + if (res16 != 3 || res32 != 3 || res64 != 3) { + printf("[BAD]\tfisttp pi\n"); + return 1; + } + ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); + if (ex != FE_INEXACT) { + printf("[BAD]\tfisttp pi: wrong exception state\n"); + return 1; + } + + feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); + asm volatile ("\n" + " fldpi""\n" + " fchs""\n" + " fisttp res16""\n" + " fldpi""\n" + " fchs""\n" + " fisttpl res32""\n" + " fldpi""\n" + " fchs""\n" + " fisttpll res64""\n" + : : : "memory" + ); + if (res16 != 0xfffd || res32 != 0xfffffffd || res64 != 0xfffffffffffffffdULL) { + printf("[BAD]\tfisttp -pi\n"); + return 1; + } + ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); + if (ex != FE_INEXACT) { + printf("[BAD]\tfisttp -pi: wrong exception state\n"); + return 1; + } + + feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); + asm volatile ("\n" + " fldln2""\n" + " fisttp res16""\n" + " fldln2""\n" + " fisttpl res32""\n" + " fldln2""\n" + " fisttpll res64""\n" + : : : "memory" + ); + /* Test truncation to zero (round-to-nearest would give 1 here) */ + if (res16 != 0 || res32 != 0 || res64 != 0) { + printf("[BAD]\tfisttp ln2\n"); + return 1; + } + ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); + if (ex != FE_INEXACT) { + printf("[BAD]\tfisttp ln2: wrong exception state\n"); + return 1; + } + + return 0; +} + +void sighandler(int sig) +{ + printf("[FAIL]\tGot signal %d, exiting\n", sig); + exit(1); +} + +int main(int argc, char **argv, char **envp) +{ + int err = 0; + + /* SIGILL triggers on 32-bit kernels w/o fisttp emulation + * when run with "no387 nofxsr". Other signals are caught + * just in case. + */ + signal(SIGILL, sighandler); + signal(SIGFPE, sighandler); + signal(SIGSEGV, sighandler); + + printf("[RUN]\tTesting fisttp instructions\n"); + err |= test(); + if (!err) + printf("[OK]\tfisttp\n"); + else + printf("[FAIL]\tfisttp errors: %d\n", err); + + return err; +} diff --git a/kernel/tools/testing/selftests/x86/test_syscall_vdso.c b/kernel/tools/testing/selftests/x86/test_syscall_vdso.c new file mode 100644 index 000000000..40370354d --- /dev/null +++ b/kernel/tools/testing/selftests/x86/test_syscall_vdso.c @@ -0,0 +1,401 @@ +/* + * 32-bit syscall ABI conformance test. + * + * Copyright (c) 2015 Denys Vlasenko + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +/* + * Can be built statically: + * gcc -Os -Wall -static -m32 test_syscall_vdso.c thunks_32.S + */ +#undef _GNU_SOURCE +#define _GNU_SOURCE 1 +#undef __USE_GNU +#define __USE_GNU 1 +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <signal.h> +#include <sys/types.h> +#include <sys/select.h> +#include <sys/time.h> +#include <elf.h> +#include <sys/ptrace.h> +#include <sys/wait.h> + +#if !defined(__i386__) +int main(int argc, char **argv, char **envp) +{ + printf("[SKIP]\tNot a 32-bit x86 userspace\n"); + return 0; +} +#else + +long syscall_addr; +long get_syscall(char **envp) +{ + Elf32_auxv_t *auxv; + while (*envp++ != NULL) + continue; + for (auxv = (void *)envp; auxv->a_type != AT_NULL; auxv++) + if (auxv->a_type == AT_SYSINFO) + return auxv->a_un.a_val; + printf("[WARN]\tAT_SYSINFO not supplied\n"); + return 0; +} + +asm ( + " .pushsection .text\n" + " .global int80\n" + "int80:\n" + " int $0x80\n" + " ret\n" + " .popsection\n" +); +extern char int80; + +struct regs64 { + uint64_t rax, rbx, rcx, rdx; + uint64_t rsi, rdi, rbp, rsp; + uint64_t r8, r9, r10, r11; + uint64_t r12, r13, r14, r15; +}; +struct regs64 regs64; +int kernel_is_64bit; + +asm ( + " .pushsection .text\n" + " .code64\n" + "get_regs64:\n" + " push %rax\n" + " mov $regs64, %eax\n" + " pop 0*8(%rax)\n" + " movq %rbx, 1*8(%rax)\n" + " movq %rcx, 2*8(%rax)\n" + " movq %rdx, 3*8(%rax)\n" + " movq %rsi, 4*8(%rax)\n" + " movq %rdi, 5*8(%rax)\n" + " movq %rbp, 6*8(%rax)\n" + " movq %rsp, 7*8(%rax)\n" + " movq %r8, 8*8(%rax)\n" + " movq %r9, 9*8(%rax)\n" + " movq %r10, 10*8(%rax)\n" + " movq %r11, 11*8(%rax)\n" + " movq %r12, 12*8(%rax)\n" + " movq %r13, 13*8(%rax)\n" + " movq %r14, 14*8(%rax)\n" + " movq %r15, 15*8(%rax)\n" + " ret\n" + "poison_regs64:\n" + " movq $0x7f7f7f7f, %r8\n" + " shl $32, %r8\n" + " orq $0x7f7f7f7f, %r8\n" + " movq %r8, %r9\n" + " movq %r8, %r10\n" + " movq %r8, %r11\n" + " movq %r8, %r12\n" + " movq %r8, %r13\n" + " movq %r8, %r14\n" + " movq %r8, %r15\n" + " ret\n" + " .code32\n" + " .popsection\n" +); +extern void get_regs64(void); +extern void poison_regs64(void); +extern unsigned long call64_from_32(void (*function)(void)); +void print_regs64(void) +{ + if (!kernel_is_64bit) + return; + printf("ax:%016llx bx:%016llx cx:%016llx dx:%016llx\n", regs64.rax, regs64.rbx, regs64.rcx, regs64.rdx); + printf("si:%016llx di:%016llx bp:%016llx sp:%016llx\n", regs64.rsi, regs64.rdi, regs64.rbp, regs64.rsp); + printf(" 8:%016llx 9:%016llx 10:%016llx 11:%016llx\n", regs64.r8 , regs64.r9 , regs64.r10, regs64.r11); + printf("12:%016llx 13:%016llx 14:%016llx 15:%016llx\n", regs64.r12, regs64.r13, regs64.r14, regs64.r15); +} + +int check_regs64(void) +{ + int err = 0; + int num = 8; + uint64_t *r64 = ®s64.r8; + + if (!kernel_is_64bit) + return 0; + + do { + if (*r64 == 0x7f7f7f7f7f7f7f7fULL) + continue; /* register did not change */ + if (syscall_addr != (long)&int80) { + /* + * Non-INT80 syscall entrypoints are allowed to clobber R8+ regs: + * either clear them to 0, or for R11, load EFLAGS. + */ + if (*r64 == 0) + continue; + if (num == 11) { + printf("[NOTE]\tR11 has changed:%016llx - assuming clobbered by SYSRET insn\n", *r64); + continue; + } + } else { + /* INT80 syscall entrypoint can be used by + * 64-bit programs too, unlike SYSCALL/SYSENTER. + * Therefore it must preserve R12+ + * (they are callee-saved registers in 64-bit C ABI). + * + * This was probably historically not intended, + * but R8..11 are clobbered (cleared to 0). + * IOW: they are the only registers which aren't + * preserved across INT80 syscall. + */ + if (*r64 == 0 && num <= 11) + continue; + } + printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64); + err++; + } while (r64++, ++num < 16); + + if (!err) + printf("[OK]\tR8..R15 did not leak kernel data\n"); + return err; +} + +int nfds; +fd_set rfds; +fd_set wfds; +fd_set efds; +struct timespec timeout; +sigset_t sigmask; +struct { + sigset_t *sp; + int sz; +} sigmask_desc; + +void prep_args() +{ + nfds = 42; + FD_ZERO(&rfds); + FD_ZERO(&wfds); + FD_ZERO(&efds); + FD_SET(0, &rfds); + FD_SET(1, &wfds); + FD_SET(2, &efds); + timeout.tv_sec = 0; + timeout.tv_nsec = 123; + sigemptyset(&sigmask); + sigaddset(&sigmask, SIGINT); + sigaddset(&sigmask, SIGUSR2); + sigaddset(&sigmask, SIGRTMAX); + sigmask_desc.sp = &sigmask; + sigmask_desc.sz = 8; /* bytes */ +} + +static void print_flags(const char *name, unsigned long r) +{ + static const char *bitarray[] = { + "\n" ,"c\n" ,/* Carry Flag */ + "0 " ,"1 " ,/* Bit 1 - always on */ + "" ,"p " ,/* Parity Flag */ + "0 " ,"3? " , + "" ,"a " ,/* Auxiliary carry Flag */ + "0 " ,"5? " , + "" ,"z " ,/* Zero Flag */ + "" ,"s " ,/* Sign Flag */ + "" ,"t " ,/* Trap Flag */ + "" ,"i " ,/* Interrupt Flag */ + "" ,"d " ,/* Direction Flag */ + "" ,"o " ,/* Overflow Flag */ + "0 " ,"1 " ,/* I/O Privilege Level (2 bits) */ + "0" ,"1" ,/* I/O Privilege Level (2 bits) */ + "" ,"n " ,/* Nested Task */ + "0 " ,"15? ", + "" ,"r " ,/* Resume Flag */ + "" ,"v " ,/* Virtual Mode */ + "" ,"ac " ,/* Alignment Check/Access Control */ + "" ,"vif ",/* Virtual Interrupt Flag */ + "" ,"vip ",/* Virtual Interrupt Pending */ + "" ,"id " ,/* CPUID detection */ + NULL + }; + const char **bitstr; + int bit; + + printf("%s=%016lx ", name, r); + bitstr = bitarray + 42; + bit = 21; + if ((r >> 22) != 0) + printf("(extra bits are set) "); + do { + if (bitstr[(r >> bit) & 1][0]) + fputs(bitstr[(r >> bit) & 1], stdout); + bitstr -= 2; + bit--; + } while (bit >= 0); +} + +int run_syscall(void) +{ + long flags, bad_arg; + + prep_args(); + + if (kernel_is_64bit) + call64_from_32(poison_regs64); + /*print_regs64();*/ + + asm("\n" + /* Try 6-arg syscall: pselect. It should return quickly */ + " push %%ebp\n" + " mov $308, %%eax\n" /* PSELECT */ + " mov nfds, %%ebx\n" /* ebx arg1 */ + " mov $rfds, %%ecx\n" /* ecx arg2 */ + " mov $wfds, %%edx\n" /* edx arg3 */ + " mov $efds, %%esi\n" /* esi arg4 */ + " mov $timeout, %%edi\n" /* edi arg5 */ + " mov $sigmask_desc, %%ebp\n" /* %ebp arg6 */ + " push $0x200ed7\n" /* set almost all flags */ + " popf\n" /* except TF, IOPL, NT, RF, VM, AC, VIF, VIP */ + " call *syscall_addr\n" + /* Check that registers are not clobbered */ + " pushf\n" + " pop %%eax\n" + " cld\n" + " cmp nfds, %%ebx\n" /* ebx arg1 */ + " mov $1, %%ebx\n" + " jne 1f\n" + " cmp $rfds, %%ecx\n" /* ecx arg2 */ + " mov $2, %%ebx\n" + " jne 1f\n" + " cmp $wfds, %%edx\n" /* edx arg3 */ + " mov $3, %%ebx\n" + " jne 1f\n" + " cmp $efds, %%esi\n" /* esi arg4 */ + " mov $4, %%ebx\n" + " jne 1f\n" + " cmp $timeout, %%edi\n" /* edi arg5 */ + " mov $5, %%ebx\n" + " jne 1f\n" + " cmpl $sigmask_desc, %%ebp\n" /* %ebp arg6 */ + " mov $6, %%ebx\n" + " jne 1f\n" + " mov $0, %%ebx\n" + "1:\n" + " pop %%ebp\n" + : "=a" (flags), "=b" (bad_arg) + : + : "cx", "dx", "si", "di" + ); + + if (kernel_is_64bit) { + memset(®s64, 0x77, sizeof(regs64)); + call64_from_32(get_regs64); + /*print_regs64();*/ + } + + /* + * On paravirt kernels, flags are not preserved across syscalls. + * Thus, we do not consider it a bug if some are changed. + * We just show ones which do. + */ + if ((0x200ed7 ^ flags) != 0) { + print_flags("[WARN]\tFlags before", 0x200ed7); + print_flags("[WARN]\tFlags after", flags); + print_flags("[WARN]\tFlags change", (0x200ed7 ^ flags)); + } + + if (bad_arg) { + printf("[FAIL]\targ#%ld clobbered\n", bad_arg); + return 1; + } + printf("[OK]\tArguments are preserved across syscall\n"); + + return check_regs64(); +} + +int run_syscall_twice() +{ + int exitcode = 0; + long sv; + + if (syscall_addr) { + printf("[RUN]\tExecuting 6-argument 32-bit syscall via VDSO\n"); + exitcode = run_syscall(); + } + sv = syscall_addr; + syscall_addr = (long)&int80; + printf("[RUN]\tExecuting 6-argument 32-bit syscall via INT 80\n"); + exitcode += run_syscall(); + syscall_addr = sv; + return exitcode; +} + +void ptrace_me() +{ + pid_t pid; + + fflush(NULL); + pid = fork(); + if (pid < 0) + exit(1); + if (pid == 0) { + /* child */ + if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) != 0) + exit(0); + raise(SIGSTOP); + return; + } + /* parent */ + printf("[RUN]\tRunning tests under ptrace\n"); + while (1) { + int status; + pid = waitpid(-1, &status, __WALL); + if (WIFEXITED(status)) + exit(WEXITSTATUS(status)); + if (WIFSIGNALED(status)) + exit(WTERMSIG(status)); + if (pid <= 0 || !WIFSTOPPED(status)) /* paranoia */ + exit(255); + /* + * Note: we do not inject sig = WSTOPSIG(status). + * We probably should, but careful: do not inject SIGTRAP + * generated by syscall entry/exit stops. + * That kills the child. + */ + ptrace(PTRACE_SYSCALL, pid, 0L, 0L /*sig*/); + } +} + +int main(int argc, char **argv, char **envp) +{ + int exitcode = 0; + int cs; + + asm("\n" + " movl %%cs, %%eax\n" + : "=a" (cs) + ); + kernel_is_64bit = (cs == 0x23); + if (!kernel_is_64bit) + printf("[NOTE]\tNot a 64-bit kernel, won't test R8..R15 leaks\n"); + + /* This only works for non-static builds: + * syscall_addr = dlsym(dlopen("linux-gate.so.1", RTLD_NOW), "__kernel_vsyscall"); + */ + syscall_addr = get_syscall(envp); + + exitcode += run_syscall_twice(); + ptrace_me(); + exitcode += run_syscall_twice(); + + return exitcode; +} +#endif diff --git a/kernel/tools/testing/selftests/x86/thunks.S b/kernel/tools/testing/selftests/x86/thunks.S new file mode 100644 index 000000000..ce8a995bb --- /dev/null +++ b/kernel/tools/testing/selftests/x86/thunks.S @@ -0,0 +1,67 @@ +/* + * thunks.S - assembly helpers for mixed-bitness code + * Copyright (c) 2015 Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * These are little helpers that make it easier to switch bitness on + * the fly. + */ + + .text + + .global call32_from_64 + .type call32_from_64, @function +call32_from_64: + // rdi: stack to use + // esi: function to call + + // Save registers + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushfq + + // Switch stacks + mov %rsp,(%rdi) + mov %rdi,%rsp + + // Switch to compatibility mode + pushq $0x23 /* USER32_CS */ + pushq $1f + lretq + +1: + .code32 + // Call the function + call *%esi + // Switch back to long mode + jmp $0x33,$1f + .code64 + +1: + // Restore the stack + mov (%rsp),%rsp + + // Restore registers + popfq + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + + ret + +.size call32_from_64, .-call32_from_64 diff --git a/kernel/tools/testing/selftests/x86/thunks_32.S b/kernel/tools/testing/selftests/x86/thunks_32.S new file mode 100644 index 000000000..29b644bb9 --- /dev/null +++ b/kernel/tools/testing/selftests/x86/thunks_32.S @@ -0,0 +1,55 @@ +/* + * thunks_32.S - assembly helpers for mixed-bitness code + * Copyright (c) 2015 Denys Vlasenko + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * These are little helpers that make it easier to switch bitness on + * the fly. + */ + + .text + .code32 + + .global call64_from_32 + .type call32_from_64, @function + + // 4(%esp): function to call +call64_from_32: + // Fetch function address + mov 4(%esp), %eax + + // Save registers which are callee-clobbered by 64-bit ABI + push %ecx + push %edx + push %esi + push %edi + + // Switch to long mode + jmp $0x33,$1f +1: .code64 + + // Call the function + call *%rax + + // Switch to compatibility mode + push $0x23 /* USER32_CS */ + .code32; push $1f; .code64 /* hack: can't have X86_64_32S relocation in 32-bit ELF */ + lretq +1: .code32 + + pop %edi + pop %esi + pop %edx + pop %ecx + + ret + +.size call64_from_32, .-call64_from_32 diff --git a/kernel/tools/testing/selftests/x86/trivial_64bit_program.c b/kernel/tools/testing/selftests/x86/trivial_64bit_program.c index b994946c4..05c6a41b3 100644 --- a/kernel/tools/testing/selftests/x86/trivial_64bit_program.c +++ b/kernel/tools/testing/selftests/x86/trivial_64bit_program.c @@ -1,5 +1,5 @@ /* - * Trivial program to check that we have a valid 32-bit build environment. + * Trivial program to check that we have a valid 64-bit build environment. * Copyright (c) 2015 Andy Lutomirski * GPL v2 */ diff --git a/kernel/tools/testing/selftests/x86/unwind_vdso.c b/kernel/tools/testing/selftests/x86/unwind_vdso.c new file mode 100644 index 000000000..00a26a82f --- /dev/null +++ b/kernel/tools/testing/selftests/x86/unwind_vdso.c @@ -0,0 +1,211 @@ +/* + * unwind_vdso.c - tests unwind info for AT_SYSINFO in the vDSO + * Copyright (c) 2014-2015 Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * This tests __kernel_vsyscall's unwind info. + */ + +#define _GNU_SOURCE + +#include <features.h> +#include <stdio.h> + +#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ < 16 + +int main() +{ + /* We need getauxval(). */ + printf("[SKIP]\tGLIBC before 2.16 cannot compile this test\n"); + return 0; +} + +#else + +#include <sys/time.h> +#include <stdlib.h> +#include <syscall.h> +#include <unistd.h> +#include <string.h> +#include <inttypes.h> +#include <sys/mman.h> +#include <signal.h> +#include <sys/ucontext.h> +#include <err.h> +#include <stddef.h> +#include <stdbool.h> +#include <sys/ptrace.h> +#include <sys/user.h> +#include <sys/ucontext.h> +#include <link.h> +#include <sys/auxv.h> +#include <dlfcn.h> +#include <unwind.h> + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +#ifdef __x86_64__ +# define WIDTH "q" +#else +# define WIDTH "l" +#endif + +static unsigned long get_eflags(void) +{ + unsigned long eflags; + asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags)); + return eflags; +} + +static void set_eflags(unsigned long eflags) +{ + asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH + : : "rm" (eflags) : "flags"); +} + +#define X86_EFLAGS_TF (1UL << 8) + +static volatile sig_atomic_t nerrs; +static unsigned long sysinfo; +static bool got_sysinfo = false; +static unsigned long return_address; + +struct unwind_state { + unsigned long ip; /* trap source */ + int depth; /* -1 until we hit the trap source */ +}; + +_Unwind_Reason_Code trace_fn(struct _Unwind_Context * ctx, void *opaque) +{ + struct unwind_state *state = opaque; + unsigned long ip = _Unwind_GetIP(ctx); + + if (state->depth == -1) { + if (ip == state->ip) + state->depth = 0; + else + return _URC_NO_REASON; /* Not there yet */ + } + printf("\t 0x%lx\n", ip); + + if (ip == return_address) { + /* Here we are. */ + unsigned long eax = _Unwind_GetGR(ctx, 0); + unsigned long ecx = _Unwind_GetGR(ctx, 1); + unsigned long edx = _Unwind_GetGR(ctx, 2); + unsigned long ebx = _Unwind_GetGR(ctx, 3); + unsigned long ebp = _Unwind_GetGR(ctx, 5); + unsigned long esi = _Unwind_GetGR(ctx, 6); + unsigned long edi = _Unwind_GetGR(ctx, 7); + bool ok = (eax == SYS_getpid || eax == getpid()) && + ebx == 1 && ecx == 2 && edx == 3 && + esi == 4 && edi == 5 && ebp == 6; + + if (!ok) + nerrs++; + printf("[%s]\t NR = %ld, args = %ld, %ld, %ld, %ld, %ld, %ld\n", + (ok ? "OK" : "FAIL"), + eax, ebx, ecx, edx, esi, edi, ebp); + + return _URC_NORMAL_STOP; + } else { + state->depth++; + return _URC_NO_REASON; + } +} + +static void sigtrap(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t *)ctx_void; + struct unwind_state state; + unsigned long ip = ctx->uc_mcontext.gregs[REG_EIP]; + + if (!got_sysinfo && ip == sysinfo) { + got_sysinfo = true; + + /* Find the return address. */ + return_address = *(unsigned long *)(unsigned long)ctx->uc_mcontext.gregs[REG_ESP]; + + printf("\tIn vsyscall at 0x%lx, returning to 0x%lx\n", + ip, return_address); + } + + if (!got_sysinfo) + return; /* Not there yet */ + + if (ip == return_address) { + ctx->uc_mcontext.gregs[REG_EFL] &= ~X86_EFLAGS_TF; + printf("\tVsyscall is done\n"); + return; + } + + printf("\tSIGTRAP at 0x%lx\n", ip); + + state.ip = ip; + state.depth = -1; + _Unwind_Backtrace(trace_fn, &state); +} + +int main() +{ + sysinfo = getauxval(AT_SYSINFO); + printf("\tAT_SYSINFO is 0x%lx\n", sysinfo); + + Dl_info info; + if (!dladdr((void *)sysinfo, &info)) { + printf("[WARN]\tdladdr failed on AT_SYSINFO\n"); + } else { + printf("[OK]\tAT_SYSINFO maps to %s, loaded at 0x%p\n", + info.dli_fname, info.dli_fbase); + } + + sethandler(SIGTRAP, sigtrap, 0); + + syscall(SYS_getpid); /* Force symbol binding without TF set. */ + printf("[RUN]\tSet TF and check a fast syscall\n"); + set_eflags(get_eflags() | X86_EFLAGS_TF); + syscall(SYS_getpid, 1, 2, 3, 4, 5, 6); + if (!got_sysinfo) { + set_eflags(get_eflags() & ~X86_EFLAGS_TF); + + /* + * The most likely cause of this is that you're on Debian or + * a Debian-based distro, you're missing libc6-i686, and you're + * affected by libc/19006 (https://sourceware.org/PR19006). + */ + printf("[WARN]\tsyscall(2) didn't enter AT_SYSINFO\n"); + } + + if (get_eflags() & X86_EFLAGS_TF) { + printf("[FAIL]\tTF is still set\n"); + nerrs++; + } + + if (nerrs) { + printf("[FAIL]\tThere were errors\n"); + return 1; + } else { + printf("[OK]\tAll is well\n"); + return 0; + } +} + +#endif /* New enough libc */ diff --git a/kernel/tools/testing/selftests/zram/Makefile b/kernel/tools/testing/selftests/zram/Makefile new file mode 100644 index 000000000..29d80346e --- /dev/null +++ b/kernel/tools/testing/selftests/zram/Makefile @@ -0,0 +1,9 @@ +all: + +TEST_PROGS := zram.sh +TEST_FILES := zram01.sh zram02.sh zram_lib.sh + +include ../lib.mk + +clean: + $(RM) err.log diff --git a/kernel/tools/testing/selftests/zram/README b/kernel/tools/testing/selftests/zram/README new file mode 100644 index 000000000..eb17917c8 --- /dev/null +++ b/kernel/tools/testing/selftests/zram/README @@ -0,0 +1,40 @@ +zram: Compressed RAM based block devices +---------------------------------------- +* Introduction + +The zram module creates RAM based block devices named /dev/zram<id> +(<id> = 0, 1, ...). Pages written to these disks are compressed and stored +in memory itself. These disks allow very fast I/O and compression provides +good amounts of memory savings. Some of the usecases include /tmp storage, +use as swap disks, various caches under /var and maybe many more :) + +Statistics for individual zram devices are exported through sysfs nodes at +/sys/block/zram<id>/ + +Kconfig required: +CONFIG_ZRAM=y +CONFIG_ZRAM_LZ4_COMPRESS=y +CONFIG_ZPOOL=y +CONFIG_ZSMALLOC=y + +ZRAM Testcases +-------------- +zram_lib.sh: create library with initialization/cleanup functions +zram.sh: For sanity check of CONFIG_ZRAM and to run zram01 and zram02 + +Two functional tests: zram01 and zram02: +zram01.sh: creates general purpose ram disks with ext4 filesystems +zram02.sh: creates block device for swap + +Commands required for testing: + - bc + - dd + - free + - awk + - mkswap + - swapon + - swapoff + - mkfs/ mkfs.ext4 + +For more information please refer: +kernel-source-tree/Documentation/blockdev/zram.txt diff --git a/kernel/tools/testing/selftests/zram/zram.sh b/kernel/tools/testing/selftests/zram/zram.sh new file mode 100755 index 000000000..683a292e3 --- /dev/null +++ b/kernel/tools/testing/selftests/zram/zram.sh @@ -0,0 +1,27 @@ +#!/bin/bash +TCID="zram.sh" + +. ./zram_lib.sh + +run_zram () { +echo "--------------------" +echo "running zram tests" +echo "--------------------" +./zram01.sh +echo "" +./zram02.sh +} + +check_prereqs + +# check zram module exists +MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko +if [ -f $MODULE_PATH ]; then + run_zram +elif [ -b /dev/zram0 ]; then + run_zram +else + echo "$TCID : No zram.ko module or /dev/zram0 device file not found" + echo "$TCID : CONFIG_ZRAM is not set" + exit 1 +fi diff --git a/kernel/tools/testing/selftests/zram/zram01.sh b/kernel/tools/testing/selftests/zram/zram01.sh new file mode 100755 index 000000000..b9566a647 --- /dev/null +++ b/kernel/tools/testing/selftests/zram/zram01.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# Copyright (c) 2015 Oracle and/or its affiliates. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it would be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# Test creates several zram devices with different filesystems on them. +# It fills each device with zeros and checks that compression works. +# +# Author: Alexey Kodanev <alexey.kodanev@oracle.com> +# Modified: Naresh Kamboju <naresh.kamboju@linaro.org> + +TCID="zram01" +ERR_CODE=0 + +. ./zram_lib.sh + +# Test will create the following number of zram devices: +dev_num=1 +# This is a list of parameters for zram devices. +# Number of items must be equal to 'dev_num' parameter. +zram_max_streams="2" + +# The zram sysfs node 'disksize' value can be either in bytes, +# or you can use mem suffixes. But in some old kernels, mem +# suffixes are not supported, for example, in RHEL6.6GA's kernel +# layer, it uses strict_strtoull() to parse disksize which does +# not support mem suffixes, in some newer kernels, they use +# memparse() which supports mem suffixes. So here we just use +# bytes to make sure everything works correctly. +zram_sizes="2097152" # 2MB +zram_mem_limits="2M" +zram_filesystems="ext4" +zram_algs="lzo" + +zram_fill_fs() +{ + local mem_free0=$(free -m | awk 'NR==2 {print $4}') + + for i in $(seq 0 $(($dev_num - 1))); do + echo "fill zram$i..." + local b=0 + while [ true ]; do + dd conv=notrunc if=/dev/zero of=zram${i}/file \ + oflag=append count=1 bs=1024 status=none \ + > /dev/null 2>&1 || break + b=$(($b + 1)) + done + echo "zram$i can be filled with '$b' KB" + done + + local mem_free1=$(free -m | awk 'NR==2 {print $4}') + local used_mem=$(($mem_free0 - $mem_free1)) + + local total_size=0 + for sm in $zram_sizes; do + local s=$(echo $sm | sed 's/M//') + total_size=$(($total_size + $s)) + done + + echo "zram used ${used_mem}M, zram disk sizes ${total_size}M" + + local v=$((100 * $total_size / $used_mem)) + + if [ "$v" -lt 100 ]; then + echo "FAIL compression ratio: 0.$v:1" + ERR_CODE=-1 + zram_cleanup + return + fi + + echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK" +} + +check_prereqs +zram_load +zram_max_streams +zram_compress_alg +zram_set_disksizes +zram_set_memlimit +zram_makefs +zram_mount + +zram_fill_fs +zram_cleanup +zram_unload + +if [ $ERR_CODE -ne 0 ]; then + echo "$TCID : [FAIL]" +else + echo "$TCID : [PASS]" +fi diff --git a/kernel/tools/testing/selftests/zram/zram02.sh b/kernel/tools/testing/selftests/zram/zram02.sh new file mode 100755 index 000000000..74569b883 --- /dev/null +++ b/kernel/tools/testing/selftests/zram/zram02.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# Copyright (c) 2015 Oracle and/or its affiliates. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it would be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# Test checks that we can create swap zram device. +# +# Author: Alexey Kodanev <alexey.kodanev@oracle.com> +# Modified: Naresh Kamboju <naresh.kamboju@linaro.org> + +TCID="zram02" +ERR_CODE=0 + +. ./zram_lib.sh + +# Test will create the following number of zram devices: +dev_num=1 +# This is a list of parameters for zram devices. +# Number of items must be equal to 'dev_num' parameter. +zram_max_streams="2" + +# The zram sysfs node 'disksize' value can be either in bytes, +# or you can use mem suffixes. But in some old kernels, mem +# suffixes are not supported, for example, in RHEL6.6GA's kernel +# layer, it uses strict_strtoull() to parse disksize which does +# not support mem suffixes, in some newer kernels, they use +# memparse() which supports mem suffixes. So here we just use +# bytes to make sure everything works correctly. +zram_sizes="1048576" # 1M +zram_mem_limits="1M" + +check_prereqs +zram_load +zram_max_streams +zram_set_disksizes +zram_set_memlimit +zram_makeswap +zram_swapoff +zram_cleanup +zram_unload + +if [ $ERR_CODE -ne 0 ]; then + echo "$TCID : [FAIL]" +else + echo "$TCID : [PASS]" +fi diff --git a/kernel/tools/testing/selftests/zram/zram_lib.sh b/kernel/tools/testing/selftests/zram/zram_lib.sh new file mode 100755 index 000000000..f6a9c73e7 --- /dev/null +++ b/kernel/tools/testing/selftests/zram/zram_lib.sh @@ -0,0 +1,233 @@ +#!/bin/sh +# Copyright (c) 2015 Oracle and/or its affiliates. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it would be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# Author: Alexey Kodanev <alexey.kodanev@oracle.com> +# Modified: Naresh Kamboju <naresh.kamboju@linaro.org> + +MODULE=0 +dev_makeswap=-1 +dev_mounted=-1 + +trap INT + +check_prereqs() +{ + local msg="skip all tests:" + local uid=$(id -u) + + if [ $uid -ne 0 ]; then + echo $msg must be run as root >&2 + exit 0 + fi +} + +zram_cleanup() +{ + echo "zram cleanup" + local i= + for i in $(seq 0 $dev_makeswap); do + swapoff /dev/zram$i + done + + for i in $(seq 0 $dev_mounted); do + umount /dev/zram$i + done + + for i in $(seq 0 $(($dev_num - 1))); do + echo 1 > /sys/block/zram${i}/reset + rm -rf zram$i + done + +} + +zram_unload() +{ + if [ $MODULE -ne 0 ] ; then + echo "zram rmmod zram" + rmmod zram > /dev/null 2>&1 + fi +} + +zram_load() +{ + # check zram module exists + MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko + if [ -f $MODULE_PATH ]; then + MODULE=1 + echo "create '$dev_num' zram device(s)" + modprobe zram num_devices=$dev_num + if [ $? -ne 0 ]; then + echo "failed to insert zram module" + exit 1 + fi + + dev_num_created=$(ls /dev/zram* | wc -w) + + if [ "$dev_num_created" -ne "$dev_num" ]; then + echo "unexpected num of devices: $dev_num_created" + ERR_CODE=-1 + else + echo "zram load module successful" + fi + elif [ -b /dev/zram0 ]; then + echo "/dev/zram0 device file found: OK" + else + echo "ERROR: No zram.ko module or no /dev/zram0 device found" + echo "$TCID : CONFIG_ZRAM is not set" + exit 1 + fi +} + +zram_max_streams() +{ + echo "set max_comp_streams to zram device(s)" + + local i=0 + for max_s in $zram_max_streams; do + local sys_path="/sys/block/zram${i}/max_comp_streams" + echo $max_s > $sys_path || \ + echo "FAIL failed to set '$max_s' to $sys_path" + sleep 1 + local max_streams=$(cat $sys_path) + + [ "$max_s" -ne "$max_streams" ] && \ + echo "FAIL can't set max_streams '$max_s', get $max_stream" + + i=$(($i + 1)) + echo "$sys_path = '$max_streams' ($i/$dev_num)" + done + + echo "zram max streams: OK" +} + +zram_compress_alg() +{ + echo "test that we can set compression algorithm" + + local algs=$(cat /sys/block/zram0/comp_algorithm) + echo "supported algs: $algs" + local i=0 + for alg in $zram_algs; do + local sys_path="/sys/block/zram${i}/comp_algorithm" + echo "$alg" > $sys_path || \ + echo "FAIL can't set '$alg' to $sys_path" + i=$(($i + 1)) + echo "$sys_path = '$alg' ($i/$dev_num)" + done + + echo "zram set compression algorithm: OK" +} + +zram_set_disksizes() +{ + echo "set disk size to zram device(s)" + local i=0 + for ds in $zram_sizes; do + local sys_path="/sys/block/zram${i}/disksize" + echo "$ds" > $sys_path || \ + echo "FAIL can't set '$ds' to $sys_path" + + i=$(($i + 1)) + echo "$sys_path = '$ds' ($i/$dev_num)" + done + + echo "zram set disksizes: OK" +} + +zram_set_memlimit() +{ + echo "set memory limit to zram device(s)" + + local i=0 + for ds in $zram_mem_limits; do + local sys_path="/sys/block/zram${i}/mem_limit" + echo "$ds" > $sys_path || \ + echo "FAIL can't set '$ds' to $sys_path" + + i=$(($i + 1)) + echo "$sys_path = '$ds' ($i/$dev_num)" + done + + echo "zram set memory limit: OK" +} + +zram_makeswap() +{ + echo "make swap with zram device(s)" + local i=0 + for i in $(seq 0 $(($dev_num - 1))); do + mkswap /dev/zram$i > err.log 2>&1 + if [ $? -ne 0 ]; then + cat err.log + echo "FAIL mkswap /dev/zram$1 failed" + fi + + swapon /dev/zram$i > err.log 2>&1 + if [ $? -ne 0 ]; then + cat err.log + echo "FAIL swapon /dev/zram$1 failed" + fi + + echo "done with /dev/zram$i" + dev_makeswap=$i + done + + echo "zram making zram mkswap and swapon: OK" +} + +zram_swapoff() +{ + local i= + for i in $(seq 0 $dev_makeswap); do + swapoff /dev/zram$i > err.log 2>&1 + if [ $? -ne 0 ]; then + cat err.log + echo "FAIL swapoff /dev/zram$i failed" + fi + done + dev_makeswap=-1 + + echo "zram swapoff: OK" +} + +zram_makefs() +{ + local i=0 + for fs in $zram_filesystems; do + # if requested fs not supported default it to ext2 + which mkfs.$fs > /dev/null 2>&1 || fs=ext2 + + echo "make $fs filesystem on /dev/zram$i" + mkfs.$fs /dev/zram$i > err.log 2>&1 + if [ $? -ne 0 ]; then + cat err.log + echo "FAIL failed to make $fs on /dev/zram$i" + fi + i=$(($i + 1)) + echo "zram mkfs.$fs: OK" + done +} + +zram_mount() +{ + local i=0 + for i in $(seq 0 $(($dev_num - 1))); do + echo "mount /dev/zram$i" + mkdir zram$i + mount /dev/zram$i zram$i > /dev/null || \ + echo "FAIL mount /dev/zram$i failed" + dev_mounted=$i + done + + echo "zram mount of zram device(s): OK" +} |