summaryrefslogtreecommitdiffstats
path: root/kernel/drivers/block/zram/zram_drv.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/drivers/block/zram/zram_drv.c')
-rw-r--r--kernel/drivers/block/zram/zram_drv.c1024
1 files changed, 578 insertions, 446 deletions
diff --git a/kernel/drivers/block/zram/zram_drv.c b/kernel/drivers/block/zram/zram_drv.c
index 6e134f475..65e0b375a 100644
--- a/kernel/drivers/block/zram/zram_drv.c
+++ b/kernel/drivers/block/zram/zram_drv.c
@@ -15,10 +15,6 @@
#define KMSG_COMPONENT "zram"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-#ifdef CONFIG_ZRAM_DEBUG
-#define DEBUG
-#endif
-
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/bio.h>
@@ -32,12 +28,16 @@
#include <linux/string.h>
#include <linux/vmalloc.h>
#include <linux/err.h>
+#include <linux/idr.h>
+#include <linux/sysfs.h>
#include "zram_drv.h"
-/* Globals */
+static DEFINE_IDR(zram_index_idr);
+/* idr index must be protected */
+static DEFINE_MUTEX(zram_index_mutex);
+
static int zram_major;
-static struct zram *zram_devices;
static const char *default_compressor = "lzo";
/* Module params (documentation at end) */
@@ -53,7 +53,7 @@ static inline void deprecated_attr_warn(const char *name)
}
#define ZRAM_ATTR_RO(name) \
-static ssize_t name##_show(struct device *d, \
+static ssize_t name##_show(struct device *d, \
struct device_attribute *attr, char *b) \
{ \
struct zram *zram = dev_to_zram(d); \
@@ -74,33 +74,117 @@ static inline struct zram *dev_to_zram(struct device *dev)
return (struct zram *)dev_to_disk(dev)->private_data;
}
-static ssize_t compact_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
+/* flag operations require table entry bit_spin_lock() being held */
+static int zram_test_flag(struct zram_meta *meta, u32 index,
+ enum zram_pageflags flag)
{
- unsigned long nr_migrated;
- struct zram *zram = dev_to_zram(dev);
- struct zram_meta *meta;
+ return meta->table[index].value & BIT(flag);
+}
- down_read(&zram->init_lock);
- if (!init_done(zram)) {
- up_read(&zram->init_lock);
- return -EINVAL;
- }
+static void zram_set_flag(struct zram_meta *meta, u32 index,
+ enum zram_pageflags flag)
+{
+ meta->table[index].value |= BIT(flag);
+}
- meta = zram->meta;
- nr_migrated = zs_compact(meta->mem_pool);
- atomic64_add(nr_migrated, &zram->stats.num_migrated);
- up_read(&zram->init_lock);
+static void zram_clear_flag(struct zram_meta *meta, u32 index,
+ enum zram_pageflags flag)
+{
+ meta->table[index].value &= ~BIT(flag);
+}
- return len;
+static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
+{
+ return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
}
-static ssize_t disksize_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static void zram_set_obj_size(struct zram_meta *meta,
+ u32 index, size_t size)
{
- struct zram *zram = dev_to_zram(dev);
+ unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT;
- return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
+ meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
+}
+
+static inline bool is_partial_io(struct bio_vec *bvec)
+{
+ return bvec->bv_len != PAGE_SIZE;
+}
+
+/*
+ * Check if request is within bounds and aligned on zram logical blocks.
+ */
+static inline bool valid_io_request(struct zram *zram,
+ sector_t start, unsigned int size)
+{
+ u64 end, bound;
+
+ /* unaligned request */
+ if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
+ return false;
+ if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
+ return false;
+
+ end = start + (size >> SECTOR_SHIFT);
+ bound = zram->disksize >> SECTOR_SHIFT;
+ /* out of range range */
+ if (unlikely(start >= bound || end > bound || start > end))
+ return false;
+
+ /* I/O request is valid */
+ return true;
+}
+
+static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
+{
+ if (*offset + bvec->bv_len >= PAGE_SIZE)
+ (*index)++;
+ *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
+}
+
+static inline void update_used_max(struct zram *zram,
+ const unsigned long pages)
+{
+ unsigned long old_max, cur_max;
+
+ old_max = atomic_long_read(&zram->stats.max_used_pages);
+
+ do {
+ cur_max = old_max;
+ if (pages > cur_max)
+ old_max = atomic_long_cmpxchg(
+ &zram->stats.max_used_pages, cur_max, pages);
+ } while (old_max != cur_max);
+}
+
+static bool page_zero_filled(void *ptr)
+{
+ unsigned int pos;
+ unsigned long *page;
+
+ page = (unsigned long *)ptr;
+
+ for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
+ if (page[pos])
+ return false;
+ }
+
+ return true;
+}
+
+static void handle_zero_page(struct bio_vec *bvec)
+{
+ struct page *page = bvec->bv_page;
+ void *user_mem;
+
+ user_mem = kmap_atomic(page);
+ if (is_partial_io(bvec))
+ memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
+ else
+ clear_page(user_mem);
+ kunmap_atomic(user_mem);
+
+ flush_dcache_page(page);
}
static ssize_t initstate_show(struct device *dev,
@@ -116,6 +200,14 @@ static ssize_t initstate_show(struct device *dev,
return scnprintf(buf, PAGE_SIZE, "%u\n", val);
}
+static ssize_t disksize_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
+}
+
static ssize_t orig_data_size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -143,19 +235,6 @@ static ssize_t mem_used_total_show(struct device *dev,
return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
}
-static ssize_t max_comp_streams_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- int val;
- struct zram *zram = dev_to_zram(dev);
-
- down_read(&zram->init_lock);
- val = zram->max_comp_streams;
- up_read(&zram->init_lock);
-
- return scnprintf(buf, PAGE_SIZE, "%d\n", val);
-}
-
static ssize_t mem_limit_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -225,6 +304,19 @@ static ssize_t mem_used_max_store(struct device *dev,
return len;
}
+static ssize_t max_comp_streams_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int val;
+ struct zram *zram = dev_to_zram(dev);
+
+ down_read(&zram->init_lock);
+ val = zram->max_comp_streams;
+ up_read(&zram->init_lock);
+
+ return scnprintf(buf, PAGE_SIZE, "%d\n", val);
+}
+
static ssize_t max_comp_streams_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
@@ -271,6 +363,11 @@ static ssize_t comp_algorithm_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
struct zram *zram = dev_to_zram(dev);
+ size_t sz;
+
+ if (!zcomp_available_algorithm(buf))
+ return -EINVAL;
+
down_write(&zram->init_lock);
if (init_done(zram)) {
up_write(&zram->init_lock);
@@ -278,69 +375,108 @@ static ssize_t comp_algorithm_store(struct device *dev,
return -EBUSY;
}
strlcpy(zram->compressor, buf, sizeof(zram->compressor));
+
+ /* ignore trailing newline */
+ sz = strlen(zram->compressor);
+ if (sz > 0 && zram->compressor[sz - 1] == '\n')
+ zram->compressor[sz - 1] = 0x00;
+
up_write(&zram->init_lock);
return len;
}
-/* flag operations needs meta->tb_lock */
-static int zram_test_flag(struct zram_meta *meta, u32 index,
- enum zram_pageflags flag)
+static ssize_t compact_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
{
- return meta->table[index].value & BIT(flag);
-}
+ struct zram *zram = dev_to_zram(dev);
+ struct zram_meta *meta;
-static void zram_set_flag(struct zram_meta *meta, u32 index,
- enum zram_pageflags flag)
-{
- meta->table[index].value |= BIT(flag);
-}
+ down_read(&zram->init_lock);
+ if (!init_done(zram)) {
+ up_read(&zram->init_lock);
+ return -EINVAL;
+ }
-static void zram_clear_flag(struct zram_meta *meta, u32 index,
- enum zram_pageflags flag)
-{
- meta->table[index].value &= ~BIT(flag);
+ meta = zram->meta;
+ zs_compact(meta->mem_pool);
+ up_read(&zram->init_lock);
+
+ return len;
}
-static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
+static ssize_t io_stat_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
+ struct zram *zram = dev_to_zram(dev);
+ ssize_t ret;
+
+ down_read(&zram->init_lock);
+ ret = scnprintf(buf, PAGE_SIZE,
+ "%8llu %8llu %8llu %8llu\n",
+ (u64)atomic64_read(&zram->stats.failed_reads),
+ (u64)atomic64_read(&zram->stats.failed_writes),
+ (u64)atomic64_read(&zram->stats.invalid_io),
+ (u64)atomic64_read(&zram->stats.notify_free));
+ up_read(&zram->init_lock);
+
+ return ret;
}
-static void zram_set_obj_size(struct zram_meta *meta,
- u32 index, size_t size)
+static ssize_t mm_stat_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT;
+ struct zram *zram = dev_to_zram(dev);
+ struct zs_pool_stats pool_stats;
+ u64 orig_size, mem_used = 0;
+ long max_used;
+ ssize_t ret;
- meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
+ memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
+
+ down_read(&zram->init_lock);
+ if (init_done(zram)) {
+ mem_used = zs_get_total_pages(zram->meta->mem_pool);
+ zs_pool_stats(zram->meta->mem_pool, &pool_stats);
+ }
+
+ orig_size = atomic64_read(&zram->stats.pages_stored);
+ max_used = atomic_long_read(&zram->stats.max_used_pages);
+
+ ret = scnprintf(buf, PAGE_SIZE,
+ "%8llu %8llu %8llu %8lu %8ld %8llu %8lu\n",
+ orig_size << PAGE_SHIFT,
+ (u64)atomic64_read(&zram->stats.compr_data_size),
+ mem_used << PAGE_SHIFT,
+ zram->limit_pages << PAGE_SHIFT,
+ max_used << PAGE_SHIFT,
+ (u64)atomic64_read(&zram->stats.zero_pages),
+ pool_stats.pages_compacted);
+ up_read(&zram->init_lock);
+
+ return ret;
}
-static inline int is_partial_io(struct bio_vec *bvec)
+static DEVICE_ATTR_RO(io_stat);
+static DEVICE_ATTR_RO(mm_stat);
+ZRAM_ATTR_RO(num_reads);
+ZRAM_ATTR_RO(num_writes);
+ZRAM_ATTR_RO(failed_reads);
+ZRAM_ATTR_RO(failed_writes);
+ZRAM_ATTR_RO(invalid_io);
+ZRAM_ATTR_RO(notify_free);
+ZRAM_ATTR_RO(zero_pages);
+ZRAM_ATTR_RO(compr_data_size);
+
+static inline bool zram_meta_get(struct zram *zram)
{
- return bvec->bv_len != PAGE_SIZE;
+ if (atomic_inc_not_zero(&zram->refcount))
+ return true;
+ return false;
}
-/*
- * Check if request is within bounds and aligned on zram logical blocks.
- */
-static inline int valid_io_request(struct zram *zram,
- sector_t start, unsigned int size)
+static inline void zram_meta_put(struct zram *zram)
{
- u64 end, bound;
-
- /* unaligned request */
- if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
- return 0;
- if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
- return 0;
-
- end = start + (size >> SECTOR_SHIFT);
- bound = zram->disksize >> SECTOR_SHIFT;
- /* out of range range */
- if (unlikely(start >= bound || end > bound || start > end))
- return 0;
-
- /* I/O request is valid */
- return 1;
+ atomic_dec(&zram->refcount);
}
static void zram_meta_free(struct zram_meta *meta, u64 disksize)
@@ -363,10 +499,9 @@ static void zram_meta_free(struct zram_meta *meta, u64 disksize)
kfree(meta);
}
-static struct zram_meta *zram_meta_alloc(int device_id, u64 disksize)
+static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize)
{
size_t num_pages;
- char pool_name[8];
struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL);
if (!meta)
@@ -379,13 +514,14 @@ static struct zram_meta *zram_meta_alloc(int device_id, u64 disksize)
goto out_error;
}
- snprintf(pool_name, sizeof(pool_name), "zram%d", device_id);
meta->mem_pool = zs_create_pool(pool_name, GFP_NOIO | __GFP_HIGHMEM);
if (!meta->mem_pool) {
pr_err("Error creating memory pool\n");
goto out_error;
}
+ zram_meta_init_table_locks(meta, disksize);
+
return meta;
out_error:
@@ -394,56 +530,6 @@ out_error:
return NULL;
}
-static inline bool zram_meta_get(struct zram *zram)
-{
- if (atomic_inc_not_zero(&zram->refcount))
- return true;
- return false;
-}
-
-static inline void zram_meta_put(struct zram *zram)
-{
- atomic_dec(&zram->refcount);
-}
-
-static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
-{
- if (*offset + bvec->bv_len >= PAGE_SIZE)
- (*index)++;
- *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
-}
-
-static int page_zero_filled(void *ptr)
-{
- unsigned int pos;
- unsigned long *page;
-
- page = (unsigned long *)ptr;
-
- for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
- if (page[pos])
- return 0;
- }
-
- return 1;
-}
-
-static void handle_zero_page(struct bio_vec *bvec)
-{
- struct page *page = bvec->bv_page;
- void *user_mem;
-
- user_mem = kmap_atomic(page);
- if (is_partial_io(bvec))
- memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
- else
- clear_page(user_mem);
- kunmap_atomic(user_mem);
-
- flush_dcache_page(page);
-}
-
-
/*
* To protect concurrent access to the same index entry,
* caller should hold this table index entry's bit_spinlock to
@@ -484,12 +570,12 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
unsigned long handle;
size_t size;
- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_lock_table(&meta->table[index]);
handle = meta->table[index].handle;
size = zram_get_obj_size(meta, index);
if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_unlock_table(&meta->table[index]);
clear_page(mem);
return 0;
}
@@ -500,7 +586,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
else
ret = zcomp_decompress(zram->comp, cmem, size, mem);
zs_unmap_object(meta->mem_pool, handle);
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_unlock_table(&meta->table[index]);
/* Should NEVER happen. Return bio error if it does. */
if (unlikely(ret)) {
@@ -520,14 +606,14 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
struct zram_meta *meta = zram->meta;
page = bvec->bv_page;
- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_lock_table(&meta->table[index]);
if (unlikely(!meta->table[index].handle) ||
zram_test_flag(meta, index, ZRAM_ZERO)) {
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_unlock_table(&meta->table[index]);
handle_zero_page(bvec);
return 0;
}
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_unlock_table(&meta->table[index]);
if (is_partial_io(bvec))
/* Use a temporary buffer to decompress the page */
@@ -538,7 +624,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
uncmem = user_mem;
if (!uncmem) {
- pr_info("Unable to allocate temp memory\n");
+ pr_err("Unable to allocate temp memory\n");
ret = -ENOMEM;
goto out_cleanup;
}
@@ -561,21 +647,6 @@ out_cleanup:
return ret;
}
-static inline void update_used_max(struct zram *zram,
- const unsigned long pages)
-{
- unsigned long old_max, cur_max;
-
- old_max = atomic_long_read(&zram->stats.max_used_pages);
-
- do {
- cur_max = old_max;
- if (pages > cur_max)
- old_max = atomic_long_cmpxchg(
- &zram->stats.max_used_pages, cur_max, pages);
- } while (old_max != cur_max);
-}
-
static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
int offset)
{
@@ -585,8 +656,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
struct page *page;
unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
struct zram_meta *meta = zram->meta;
- struct zcomp_strm *zstrm;
- bool locked = false;
+ struct zcomp_strm *zstrm = NULL;
unsigned long alloced_pages;
page = bvec->bv_page;
@@ -606,7 +676,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
}
zstrm = zcomp_strm_find(zram->comp);
- locked = true;
user_mem = kmap_atomic(page);
if (is_partial_io(bvec)) {
@@ -622,10 +691,10 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
if (user_mem)
kunmap_atomic(user_mem);
/* Free memory associated with this sector now. */
- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_lock_table(&meta->table[index]);
zram_free_page(zram, index);
zram_set_flag(meta, index, ZRAM_ZERO);
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_unlock_table(&meta->table[index]);
atomic64_inc(&zram->stats.zero_pages);
ret = 0;
@@ -652,21 +721,21 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
handle = zs_malloc(meta->mem_pool, clen);
if (!handle) {
- pr_info("Error allocating memory for compressed page: %u, size=%zu\n",
+ pr_err("Error allocating memory for compressed page: %u, size=%zu\n",
index, clen);
ret = -ENOMEM;
goto out;
}
alloced_pages = zs_get_total_pages(meta->mem_pool);
+ update_used_max(zram, alloced_pages);
+
if (zram->limit_pages && alloced_pages > zram->limit_pages) {
zs_free(meta->mem_pool, handle);
ret = -ENOMEM;
goto out;
}
- update_used_max(zram, alloced_pages);
-
cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
@@ -678,60 +747,31 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
}
zcomp_strm_release(zram->comp, zstrm);
- locked = false;
+ zstrm = NULL;
zs_unmap_object(meta->mem_pool, handle);
/*
* Free memory associated with this sector
* before overwriting unused sectors.
*/
- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_lock_table(&meta->table[index]);
zram_free_page(zram, index);
meta->table[index].handle = handle;
zram_set_obj_size(meta, index, clen);
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_unlock_table(&meta->table[index]);
/* Update stats */
atomic64_add(clen, &zram->stats.compr_data_size);
atomic64_inc(&zram->stats.pages_stored);
out:
- if (locked)
+ if (zstrm)
zcomp_strm_release(zram->comp, zstrm);
if (is_partial_io(bvec))
kfree(uncmem);
return ret;
}
-static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
- int offset, int rw)
-{
- unsigned long start_time = jiffies;
- int ret;
-
- generic_start_io_acct(rw, bvec->bv_len >> SECTOR_SHIFT,
- &zram->disk->part0);
-
- if (rw == READ) {
- atomic64_inc(&zram->stats.num_reads);
- ret = zram_bvec_read(zram, bvec, index, offset);
- } else {
- atomic64_inc(&zram->stats.num_writes);
- ret = zram_bvec_write(zram, bvec, index, offset);
- }
-
- generic_end_io_acct(rw, &zram->disk->part0, start_time);
-
- if (unlikely(ret)) {
- if (rw == READ)
- atomic64_inc(&zram->stats.failed_reads);
- else
- atomic64_inc(&zram->stats.failed_writes);
- }
-
- return ret;
-}
-
/*
* zram_bio_discard - handler on discard request
* @index: physical block index in PAGE_SIZE units
@@ -762,160 +802,41 @@ static void zram_bio_discard(struct zram *zram, u32 index,
}
while (n >= PAGE_SIZE) {
- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_lock_table(&meta->table[index]);
zram_free_page(zram, index);
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_unlock_table(&meta->table[index]);
atomic64_inc(&zram->stats.notify_free);
index++;
n -= PAGE_SIZE;
}
}
-static void zram_reset_device(struct zram *zram)
-{
- struct zram_meta *meta;
- struct zcomp *comp;
- u64 disksize;
-
- down_write(&zram->init_lock);
-
- zram->limit_pages = 0;
-
- if (!init_done(zram)) {
- up_write(&zram->init_lock);
- return;
- }
-
- meta = zram->meta;
- comp = zram->comp;
- disksize = zram->disksize;
- /*
- * Refcount will go down to 0 eventually and r/w handler
- * cannot handle further I/O so it will bail out by
- * check zram_meta_get.
- */
- zram_meta_put(zram);
- /*
- * We want to free zram_meta in process context to avoid
- * deadlock between reclaim path and any other locks.
- */
- wait_event(zram->io_done, atomic_read(&zram->refcount) == 0);
-
- /* Reset stats */
- memset(&zram->stats, 0, sizeof(zram->stats));
- zram->disksize = 0;
- zram->max_comp_streams = 1;
-
- set_capacity(zram->disk, 0);
- part_stat_set_all(&zram->disk->part0, 0);
-
- up_write(&zram->init_lock);
- /* I/O operation under all of CPU are done so let's free */
- zram_meta_free(meta, disksize);
- zcomp_destroy(comp);
-}
-
-static ssize_t disksize_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
-{
- u64 disksize;
- struct zcomp *comp;
- struct zram_meta *meta;
- struct zram *zram = dev_to_zram(dev);
- int err;
-
- disksize = memparse(buf, NULL);
- if (!disksize)
- return -EINVAL;
-
- disksize = PAGE_ALIGN(disksize);
- meta = zram_meta_alloc(zram->disk->first_minor, disksize);
- if (!meta)
- return -ENOMEM;
-
- comp = zcomp_create(zram->compressor, zram->max_comp_streams);
- if (IS_ERR(comp)) {
- pr_info("Cannot initialise %s compressing backend\n",
- zram->compressor);
- err = PTR_ERR(comp);
- goto out_free_meta;
- }
-
- down_write(&zram->init_lock);
- if (init_done(zram)) {
- pr_info("Cannot change disksize for initialized device\n");
- err = -EBUSY;
- goto out_destroy_comp;
- }
-
- init_waitqueue_head(&zram->io_done);
- atomic_set(&zram->refcount, 1);
- zram->meta = meta;
- zram->comp = comp;
- zram->disksize = disksize;
- set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
- up_write(&zram->init_lock);
-
- /*
- * Revalidate disk out of the init_lock to avoid lockdep splat.
- * It's okay because disk's capacity is protected by init_lock
- * so that revalidate_disk always sees up-to-date capacity.
- */
- revalidate_disk(zram->disk);
-
- return len;
-
-out_destroy_comp:
- up_write(&zram->init_lock);
- zcomp_destroy(comp);
-out_free_meta:
- zram_meta_free(meta, disksize);
- return err;
-}
-
-static ssize_t reset_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
+static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
+ int offset, int rw)
{
+ unsigned long start_time = jiffies;
int ret;
- unsigned short do_reset;
- struct zram *zram;
- struct block_device *bdev;
- zram = dev_to_zram(dev);
- bdev = bdget_disk(zram->disk, 0);
-
- if (!bdev)
- return -ENOMEM;
+ generic_start_io_acct(rw, bvec->bv_len >> SECTOR_SHIFT,
+ &zram->disk->part0);
- mutex_lock(&bdev->bd_mutex);
- /* Do not reset an active device! */
- if (bdev->bd_openers) {
- ret = -EBUSY;
- goto out;
+ if (rw == READ) {
+ atomic64_inc(&zram->stats.num_reads);
+ ret = zram_bvec_read(zram, bvec, index, offset);
+ } else {
+ atomic64_inc(&zram->stats.num_writes);
+ ret = zram_bvec_write(zram, bvec, index, offset);
}
- ret = kstrtou16(buf, 10, &do_reset);
- if (ret)
- goto out;
+ generic_end_io_acct(rw, &zram->disk->part0, start_time);
- if (!do_reset) {
- ret = -EINVAL;
- goto out;
+ if (unlikely(ret)) {
+ if (rw == READ)
+ atomic64_inc(&zram->stats.failed_reads);
+ else
+ atomic64_inc(&zram->stats.failed_writes);
}
- /* Make sure all pending I/O is finished */
- fsync_bdev(bdev);
- zram_reset_device(zram);
-
- mutex_unlock(&bdev->bd_mutex);
- revalidate_disk(zram->disk);
- bdput(bdev);
-
- return len;
-
-out:
- mutex_unlock(&bdev->bd_mutex);
- bdput(bdev);
return ret;
}
@@ -932,7 +853,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
if (unlikely(bio->bi_rw & REQ_DISCARD)) {
zram_bio_discard(zram, index, offset, bio);
- bio_endio(bio, 0);
+ bio_endio(bio);
return;
}
@@ -965,8 +886,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
update_position(&index, &offset, &bvec);
}
- set_bit(BIO_UPTODATE, &bio->bi_flags);
- bio_endio(bio, 0);
+ bio_endio(bio);
return;
out:
@@ -976,13 +896,15 @@ out:
/*
* Handler function for all zram I/O requests.
*/
-static void zram_make_request(struct request_queue *queue, struct bio *bio)
+static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
{
struct zram *zram = queue->queuedata;
if (unlikely(!zram_meta_get(zram)))
goto error;
+ blk_queue_split(queue, &bio, queue->bio_split);
+
if (!valid_io_request(zram, bio->bi_iter.bi_sector,
bio->bi_iter.bi_size)) {
atomic64_inc(&zram->stats.invalid_io);
@@ -991,11 +913,12 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio)
__zram_make_request(zram, bio);
zram_meta_put(zram);
- return;
+ return BLK_QC_T_NONE;
put_zram:
zram_meta_put(zram);
error:
bio_io_error(bio);
+ return BLK_QC_T_NONE;
}
static void zram_slot_free_notify(struct block_device *bdev,
@@ -1007,9 +930,9 @@ static void zram_slot_free_notify(struct block_device *bdev,
zram = bdev->bd_disk->private_data;
meta = zram->meta;
- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_lock_table(&meta->table[index]);
zram_free_page(zram, index);
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_unlock_table(&meta->table[index]);
atomic64_inc(&zram->stats.notify_free);
}
@@ -1055,80 +978,185 @@ out:
return err;
}
-static const struct block_device_operations zram_devops = {
- .swap_slot_free_notify = zram_slot_free_notify,
- .rw_page = zram_rw_page,
- .owner = THIS_MODULE
-};
+static void zram_reset_device(struct zram *zram)
+{
+ struct zram_meta *meta;
+ struct zcomp *comp;
+ u64 disksize;
-static DEVICE_ATTR_WO(compact);
-static DEVICE_ATTR_RW(disksize);
-static DEVICE_ATTR_RO(initstate);
-static DEVICE_ATTR_WO(reset);
-static DEVICE_ATTR_RO(orig_data_size);
-static DEVICE_ATTR_RO(mem_used_total);
-static DEVICE_ATTR_RW(mem_limit);
-static DEVICE_ATTR_RW(mem_used_max);
-static DEVICE_ATTR_RW(max_comp_streams);
-static DEVICE_ATTR_RW(comp_algorithm);
+ down_write(&zram->init_lock);
-static ssize_t io_stat_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+ zram->limit_pages = 0;
+
+ if (!init_done(zram)) {
+ up_write(&zram->init_lock);
+ return;
+ }
+
+ meta = zram->meta;
+ comp = zram->comp;
+ disksize = zram->disksize;
+ /*
+ * Refcount will go down to 0 eventually and r/w handler
+ * cannot handle further I/O so it will bail out by
+ * check zram_meta_get.
+ */
+ zram_meta_put(zram);
+ /*
+ * We want to free zram_meta in process context to avoid
+ * deadlock between reclaim path and any other locks.
+ */
+ wait_event(zram->io_done, atomic_read(&zram->refcount) == 0);
+
+ /* Reset stats */
+ memset(&zram->stats, 0, sizeof(zram->stats));
+ zram->disksize = 0;
+ zram->max_comp_streams = 1;
+
+ set_capacity(zram->disk, 0);
+ part_stat_set_all(&zram->disk->part0, 0);
+
+ up_write(&zram->init_lock);
+ /* I/O operation under all of CPU are done so let's free */
+ zram_meta_free(meta, disksize);
+ zcomp_destroy(comp);
+}
+
+static ssize_t disksize_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
{
+ u64 disksize;
+ struct zcomp *comp;
+ struct zram_meta *meta;
struct zram *zram = dev_to_zram(dev);
- ssize_t ret;
+ int err;
- down_read(&zram->init_lock);
- ret = scnprintf(buf, PAGE_SIZE,
- "%8llu %8llu %8llu %8llu\n",
- (u64)atomic64_read(&zram->stats.failed_reads),
- (u64)atomic64_read(&zram->stats.failed_writes),
- (u64)atomic64_read(&zram->stats.invalid_io),
- (u64)atomic64_read(&zram->stats.notify_free));
- up_read(&zram->init_lock);
+ disksize = memparse(buf, NULL);
+ if (!disksize)
+ return -EINVAL;
- return ret;
+ disksize = PAGE_ALIGN(disksize);
+ meta = zram_meta_alloc(zram->disk->disk_name, disksize);
+ if (!meta)
+ return -ENOMEM;
+
+ comp = zcomp_create(zram->compressor, zram->max_comp_streams);
+ if (IS_ERR(comp)) {
+ pr_err("Cannot initialise %s compressing backend\n",
+ zram->compressor);
+ err = PTR_ERR(comp);
+ goto out_free_meta;
+ }
+
+ down_write(&zram->init_lock);
+ if (init_done(zram)) {
+ pr_info("Cannot change disksize for initialized device\n");
+ err = -EBUSY;
+ goto out_destroy_comp;
+ }
+
+ init_waitqueue_head(&zram->io_done);
+ atomic_set(&zram->refcount, 1);
+ zram->meta = meta;
+ zram->comp = comp;
+ zram->disksize = disksize;
+ set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
+ up_write(&zram->init_lock);
+
+ /*
+ * Revalidate disk out of the init_lock to avoid lockdep splat.
+ * It's okay because disk's capacity is protected by init_lock
+ * so that revalidate_disk always sees up-to-date capacity.
+ */
+ revalidate_disk(zram->disk);
+
+ return len;
+
+out_destroy_comp:
+ up_write(&zram->init_lock);
+ zcomp_destroy(comp);
+out_free_meta:
+ zram_meta_free(meta, disksize);
+ return err;
}
-static ssize_t mm_stat_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t reset_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
{
- struct zram *zram = dev_to_zram(dev);
- u64 orig_size, mem_used = 0;
- long max_used;
- ssize_t ret;
+ int ret;
+ unsigned short do_reset;
+ struct zram *zram;
+ struct block_device *bdev;
- down_read(&zram->init_lock);
- if (init_done(zram))
- mem_used = zs_get_total_pages(zram->meta->mem_pool);
+ ret = kstrtou16(buf, 10, &do_reset);
+ if (ret)
+ return ret;
- orig_size = atomic64_read(&zram->stats.pages_stored);
- max_used = atomic_long_read(&zram->stats.max_used_pages);
+ if (!do_reset)
+ return -EINVAL;
- ret = scnprintf(buf, PAGE_SIZE,
- "%8llu %8llu %8llu %8lu %8ld %8llu %8llu\n",
- orig_size << PAGE_SHIFT,
- (u64)atomic64_read(&zram->stats.compr_data_size),
- mem_used << PAGE_SHIFT,
- zram->limit_pages << PAGE_SHIFT,
- max_used << PAGE_SHIFT,
- (u64)atomic64_read(&zram->stats.zero_pages),
- (u64)atomic64_read(&zram->stats.num_migrated));
- up_read(&zram->init_lock);
+ zram = dev_to_zram(dev);
+ bdev = bdget_disk(zram->disk, 0);
+ if (!bdev)
+ return -ENOMEM;
+
+ mutex_lock(&bdev->bd_mutex);
+ /* Do not reset an active device or claimed device */
+ if (bdev->bd_openers || zram->claim) {
+ mutex_unlock(&bdev->bd_mutex);
+ bdput(bdev);
+ return -EBUSY;
+ }
+
+ /* From now on, anyone can't open /dev/zram[0-9] */
+ zram->claim = true;
+ mutex_unlock(&bdev->bd_mutex);
+
+ /* Make sure all the pending I/O are finished */
+ fsync_bdev(bdev);
+ zram_reset_device(zram);
+ revalidate_disk(zram->disk);
+ bdput(bdev);
+
+ mutex_lock(&bdev->bd_mutex);
+ zram->claim = false;
+ mutex_unlock(&bdev->bd_mutex);
+
+ return len;
+}
+
+static int zram_open(struct block_device *bdev, fmode_t mode)
+{
+ int ret = 0;
+ struct zram *zram;
+
+ WARN_ON(!mutex_is_locked(&bdev->bd_mutex));
+
+ zram = bdev->bd_disk->private_data;
+ /* zram was claimed to reset so open request fails */
+ if (zram->claim)
+ ret = -EBUSY;
return ret;
}
-static DEVICE_ATTR_RO(io_stat);
-static DEVICE_ATTR_RO(mm_stat);
-ZRAM_ATTR_RO(num_reads);
-ZRAM_ATTR_RO(num_writes);
-ZRAM_ATTR_RO(failed_reads);
-ZRAM_ATTR_RO(failed_writes);
-ZRAM_ATTR_RO(invalid_io);
-ZRAM_ATTR_RO(notify_free);
-ZRAM_ATTR_RO(zero_pages);
-ZRAM_ATTR_RO(compr_data_size);
+static const struct block_device_operations zram_devops = {
+ .open = zram_open,
+ .swap_slot_free_notify = zram_slot_free_notify,
+ .rw_page = zram_rw_page,
+ .owner = THIS_MODULE
+};
+
+static DEVICE_ATTR_WO(compact);
+static DEVICE_ATTR_RW(disksize);
+static DEVICE_ATTR_RO(initstate);
+static DEVICE_ATTR_WO(reset);
+static DEVICE_ATTR_RO(orig_data_size);
+static DEVICE_ATTR_RO(mem_used_total);
+static DEVICE_ATTR_RW(mem_limit);
+static DEVICE_ATTR_RW(mem_used_max);
+static DEVICE_ATTR_RW(max_comp_streams);
+static DEVICE_ATTR_RW(comp_algorithm);
static struct attribute *zram_disk_attrs[] = {
&dev_attr_disksize.attr,
@@ -1158,10 +1186,24 @@ static struct attribute_group zram_disk_attr_group = {
.attrs = zram_disk_attrs,
};
-static int create_device(struct zram *zram, int device_id)
+/*
+ * Allocate and initialize new zram device. the function returns
+ * '>= 0' device_id upon success, and negative value otherwise.
+ */
+static int zram_add(void)
{
+ struct zram *zram;
struct request_queue *queue;
- int ret = -ENOMEM;
+ int ret, device_id;
+
+ zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
+ if (!zram)
+ return -ENOMEM;
+
+ ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
+ if (ret < 0)
+ goto out_free_dev;
+ device_id = ret;
init_rwsem(&zram->init_lock);
@@ -1169,15 +1211,16 @@ static int create_device(struct zram *zram, int device_id)
if (!queue) {
pr_err("Error allocating disk queue for device %d\n",
device_id);
- goto out;
+ ret = -ENOMEM;
+ goto out_free_idr;
}
blk_queue_make_request(queue, zram_make_request);
- /* gendisk structure */
+ /* gendisk structure */
zram->disk = alloc_disk(1);
if (!zram->disk) {
- pr_warn("Error allocating disk structure for device %d\n",
+ pr_err("Error allocating disk structure for device %d\n",
device_id);
ret = -ENOMEM;
goto out_free_queue;
@@ -1206,7 +1249,7 @@ static int create_device(struct zram *zram, int device_id)
blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
- zram->disk->queue->limits.max_discard_sectors = UINT_MAX;
+ blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
/*
* zram_bio_discard() will clear all logical blocks if logical block
* size is identical with physical block size(PAGE_SIZE). But if it is
@@ -1226,96 +1269,185 @@ static int create_device(struct zram *zram, int device_id)
ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj,
&zram_disk_attr_group);
if (ret < 0) {
- pr_warn("Error creating sysfs group");
+ pr_err("Error creating sysfs group for device %d\n",
+ device_id);
goto out_free_disk;
}
strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
zram->meta = NULL;
zram->max_comp_streams = 1;
- return 0;
+
+ pr_info("Added device: %s\n", zram->disk->disk_name);
+ return device_id;
out_free_disk:
del_gendisk(zram->disk);
put_disk(zram->disk);
out_free_queue:
blk_cleanup_queue(queue);
-out:
+out_free_idr:
+ idr_remove(&zram_index_idr, device_id);
+out_free_dev:
+ kfree(zram);
return ret;
}
-static void destroy_devices(unsigned int nr)
+static int zram_remove(struct zram *zram)
+{
+ struct block_device *bdev;
+
+ bdev = bdget_disk(zram->disk, 0);
+ if (!bdev)
+ return -ENOMEM;
+
+ mutex_lock(&bdev->bd_mutex);
+ if (bdev->bd_openers || zram->claim) {
+ mutex_unlock(&bdev->bd_mutex);
+ bdput(bdev);
+ return -EBUSY;
+ }
+
+ zram->claim = true;
+ mutex_unlock(&bdev->bd_mutex);
+
+ /*
+ * Remove sysfs first, so no one will perform a disksize
+ * store while we destroy the devices. This also helps during
+ * hot_remove -- zram_reset_device() is the last holder of
+ * ->init_lock, no later/concurrent disksize_store() or any
+ * other sysfs handlers are possible.
+ */
+ sysfs_remove_group(&disk_to_dev(zram->disk)->kobj,
+ &zram_disk_attr_group);
+
+ /* Make sure all the pending I/O are finished */
+ fsync_bdev(bdev);
+ zram_reset_device(zram);
+ bdput(bdev);
+
+ pr_info("Removed device: %s\n", zram->disk->disk_name);
+
+ blk_cleanup_queue(zram->disk->queue);
+ del_gendisk(zram->disk);
+ put_disk(zram->disk);
+ kfree(zram);
+ return 0;
+}
+
+/* zram-control sysfs attributes */
+static ssize_t hot_add_show(struct class *class,
+ struct class_attribute *attr,
+ char *buf)
+{
+ int ret;
+
+ mutex_lock(&zram_index_mutex);
+ ret = zram_add();
+ mutex_unlock(&zram_index_mutex);
+
+ if (ret < 0)
+ return ret;
+ return scnprintf(buf, PAGE_SIZE, "%d\n", ret);
+}
+
+static ssize_t hot_remove_store(struct class *class,
+ struct class_attribute *attr,
+ const char *buf,
+ size_t count)
{
struct zram *zram;
- unsigned int i;
+ int ret, dev_id;
- for (i = 0; i < nr; i++) {
- zram = &zram_devices[i];
- /*
- * Remove sysfs first, so no one will perform a disksize
- * store while we destroy the devices
- */
- sysfs_remove_group(&disk_to_dev(zram->disk)->kobj,
- &zram_disk_attr_group);
+ /* dev_id is gendisk->first_minor, which is `int' */
+ ret = kstrtoint(buf, 10, &dev_id);
+ if (ret)
+ return ret;
+ if (dev_id < 0)
+ return -EINVAL;
- zram_reset_device(zram);
+ mutex_lock(&zram_index_mutex);
- blk_cleanup_queue(zram->disk->queue);
- del_gendisk(zram->disk);
- put_disk(zram->disk);
+ zram = idr_find(&zram_index_idr, dev_id);
+ if (zram) {
+ ret = zram_remove(zram);
+ idr_remove(&zram_index_idr, dev_id);
+ } else {
+ ret = -ENODEV;
}
- kfree(zram_devices);
+ mutex_unlock(&zram_index_mutex);
+ return ret ? ret : count;
+}
+
+static struct class_attribute zram_control_class_attrs[] = {
+ __ATTR_RO(hot_add),
+ __ATTR_WO(hot_remove),
+ __ATTR_NULL,
+};
+
+static struct class zram_control_class = {
+ .name = "zram-control",
+ .owner = THIS_MODULE,
+ .class_attrs = zram_control_class_attrs,
+};
+
+static int zram_remove_cb(int id, void *ptr, void *data)
+{
+ zram_remove(ptr);
+ return 0;
+}
+
+static void destroy_devices(void)
+{
+ class_unregister(&zram_control_class);
+ idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
+ idr_destroy(&zram_index_idr);
unregister_blkdev(zram_major, "zram");
- pr_info("Destroyed %u device(s)\n", nr);
}
static int __init zram_init(void)
{
- int ret, dev_id;
+ int ret;
- if (num_devices > max_num_devices) {
- pr_warn("Invalid value for num_devices: %u\n",
- num_devices);
- return -EINVAL;
+ ret = class_register(&zram_control_class);
+ if (ret) {
+ pr_err("Unable to register zram-control class\n");
+ return ret;
}
zram_major = register_blkdev(0, "zram");
if (zram_major <= 0) {
- pr_warn("Unable to get major number\n");
+ pr_err("Unable to get major number\n");
+ class_unregister(&zram_control_class);
return -EBUSY;
}
- /* Allocate the device array and initialize each one */
- zram_devices = kzalloc(num_devices * sizeof(struct zram), GFP_KERNEL);
- if (!zram_devices) {
- unregister_blkdev(zram_major, "zram");
- return -ENOMEM;
- }
-
- for (dev_id = 0; dev_id < num_devices; dev_id++) {
- ret = create_device(&zram_devices[dev_id], dev_id);
- if (ret)
+ while (num_devices != 0) {
+ mutex_lock(&zram_index_mutex);
+ ret = zram_add();
+ mutex_unlock(&zram_index_mutex);
+ if (ret < 0)
goto out_error;
+ num_devices--;
}
- pr_info("Created %u device(s)\n", num_devices);
return 0;
out_error:
- destroy_devices(dev_id);
+ destroy_devices();
return ret;
}
static void __exit zram_exit(void)
{
- destroy_devices(num_devices);
+ destroy_devices();
}
module_init(zram_init);
module_exit(zram_exit);
module_param(num_devices, uint, 0);
-MODULE_PARM_DESC(num_devices, "Number of zram devices");
+MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");